Support json.Specification.MJSON

https://bitsquid.blogspot.com/2009/10/simplified-json-notation.html
This commit is contained in:
gingerBill
2021-09-28 14:53:16 +01:00
parent 6f872e04c8
commit c933054872
5 changed files with 204 additions and 176 deletions

View File

@@ -14,6 +14,9 @@ Parser :: struct {
}
make_parser :: proc(data: []byte, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator) -> Parser {
return make_parser_from_string(string(data), spec, parse_integers, allocator)
}
make_parser_from_string :: proc(data: string, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator) -> Parser {
p: Parser
p.tok = make_tokenizer(data, spec, parse_integers)
p.spec = spec
@@ -23,11 +26,25 @@ make_parser :: proc(data: []byte, spec := DEFAULT_SPECIFICATION, parse_integers
return p
}
parse :: proc(data: []byte, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator) -> (Value, Error) {
context.allocator = allocator
p := make_parser(data, spec, parse_integers, allocator)
if p.spec == Specification.JSON5 {
parse :: proc(data: []byte, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator) -> (Value, Error) {
return parse_string(string(data), spec, parse_integers, allocator)
}
parse_string :: proc(data: string, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator) -> (Value, Error) {
context.allocator = allocator
p := make_parser_from_string(data, spec, parse_integers, allocator)
switch p.spec {
case .JSON:
return parse_object(&p)
case .JSON5:
return parse_value(&p)
case .MJSON:
#partial switch p.curr_token.kind {
case .Ident, .String:
return parse_object_body(&p, .EOF)
}
return parse_value(&p)
}
return parse_object(&p)
@@ -59,12 +76,34 @@ expect_token :: proc(p: ^Parser, kind: Token_Kind) -> Error {
prev := p.curr_token
advance_token(p)
if prev.kind == kind {
return .None
return nil
}
return .Unexpected_Token
}
parse_colon :: proc(p: ^Parser) -> (err: Error) {
colon_err := expect_token(p, .Colon)
if colon_err == nil {
return nil
}
return .Expected_Colon_After_Key
}
parse_comma :: proc(p: ^Parser) -> (do_break: bool) {
switch p.spec {
case .JSON5, .MJSON:
if allow_token(p, .Comma) {
return false
}
return false
case .JSON:
if !allow_token(p, .Comma) {
return true
}
}
return false
}
parse_value :: proc(p: ^Parser) -> (value: Value, err: Error) {
token := p.curr_token
@@ -102,9 +141,15 @@ parse_value :: proc(p: ^Parser) -> (value: Value, err: Error) {
case .Open_Bracket:
return parse_array(p)
case .Ident:
if p.spec == .MJSON {
advance_token(p)
return string(token.text), nil
}
case:
if p.spec == Specification.JSON5 {
if p.spec != .JSON {
#partial switch token.kind {
case .Infinity:
inf: u64 = 0x7ff0000000000000
@@ -136,7 +181,7 @@ parse_array :: proc(p: ^Parser) -> (value: Value, err: Error) {
array: Array
array.allocator = p.allocator
defer if err != .None {
defer if err != nil {
for elem in array {
destroy_value(elem)
}
@@ -146,11 +191,8 @@ parse_array :: proc(p: ^Parser) -> (value: Value, err: Error) {
for p.curr_token.kind != .Close_Bracket {
elem := parse_value(p) or_return
append(&array, elem)
// Disallow trailing commas for the time being
if allow_token(p, .Comma) {
continue
} else {
if parse_comma(p) {
break
}
}
@@ -187,31 +229,21 @@ clone_string :: proc(s: string, allocator: mem.Allocator) -> (str: string, err:
parse_object_key :: proc(p: ^Parser, key_allocator: mem.Allocator) -> (key: string, err: Error) {
tok := p.curr_token
if p.spec == Specification.JSON5 {
if tok.kind == .String {
expect_token(p, .String)
key = unquote_string(tok, p.spec, key_allocator) or_return
return
} else if tok.kind == .Ident {
expect_token(p, .Ident)
key = clone_string(tok.text, key_allocator) or_return
return
if p.spec != .JSON {
if allow_token(p, .Ident) {
return clone_string(tok.text, key_allocator)
}
}
if tok_err := expect_token(p, .String); tok_err != .None {
if tok_err := expect_token(p, .String); tok_err != nil {
err = .Expected_String_For_Object_Key
return
}
key = unquote_string(tok, p.spec, key_allocator) or_return
return
return unquote_string(tok, p.spec, key_allocator)
}
parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) {
expect_token(p, .Open_Brace) or_return
obj: Object
parse_object_body :: proc(p: ^Parser, end_token: Token_Kind) -> (obj: Object, err: Error) {
obj.allocator = p.allocator
defer if err != .None {
defer if err != nil {
for key, elem in obj {
delete(key, p.allocator)
destroy_value(elem)
@@ -219,19 +251,9 @@ parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) {
delete(obj)
}
for p.curr_token.kind != .Close_Brace {
key: string
key, err = parse_object_key(p, p.allocator)
if err != .None {
delete(key, p.allocator)
return
}
if colon_err := expect_token(p, .Colon); colon_err != .None {
err = .Expected_Colon_After_Key
return
}
for p.curr_token.kind != end_token {
key := parse_object_key(p, p.allocator) or_return
parse_colon(p) or_return
elem := parse_value(p) or_return
if key in obj {
@@ -241,22 +263,17 @@ parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) {
}
obj[key] = elem
if p.spec == Specification.JSON5 {
// Allow trailing commas
if allow_token(p, .Comma) {
continue
}
} else {
// Disallow trailing commas
if allow_token(p, .Comma) {
continue
} else {
break
}
if parse_comma(p) {
break
}
}
}
return
}
parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) {
expect_token(p, .Open_Brace) or_return
obj := parse_object_body(p, .Close_Brace) or_return
expect_token(p, .Close_Brace) or_return
value = obj
return
@@ -387,7 +404,7 @@ unquote_string :: proc(token: Token, spec: Specification, allocator := context.a
case '0':
if spec == Specification.JSON5 {
if spec != .JSON {
b[w] = '\x00'
i += 1
w += 1
@@ -395,7 +412,7 @@ unquote_string :: proc(token: Token, spec: Specification, allocator := context.a
break loop
}
case 'v':
if spec == Specification.JSON5 {
if spec != .JSON {
b[w] = '\v'
i += 1
w += 1
@@ -404,7 +421,7 @@ unquote_string :: proc(token: Token, spec: Specification, allocator := context.a
}
case 'x':
if spec == Specification.JSON5 {
if spec != .JSON {
i -= 1 // Include the \x in the check for sanity sake
r := get_u2_rune(s[i:])
if r < 0 {

View File

@@ -43,17 +43,18 @@ Token_Kind :: enum {
Tokenizer :: struct {
using pos: Pos,
data: []byte,
data: string,
r: rune, // current rune
w: int, // current rune width in bytes
curr_line_offset: int,
spec: Specification,
parse_integers: bool,
insert_comma: bool,
}
make_tokenizer :: proc(data: []byte, spec := DEFAULT_SPECIFICATION, parse_integers := false) -> Tokenizer {
make_tokenizer :: proc(data: string, spec := DEFAULT_SPECIFICATION, parse_integers := false) -> Tokenizer {
t := Tokenizer{pos = {line=1}, data = data, spec = spec, parse_integers = parse_integers}
next_rune(&t)
if t.r == utf8.RUNE_BOM {
@@ -64,11 +65,15 @@ make_tokenizer :: proc(data: []byte, spec := DEFAULT_SPECIFICATION, parse_intege
next_rune :: proc(t: ^Tokenizer) -> rune #no_bounds_check {
if t.offset >= len(t.data) {
return utf8.RUNE_EOF
t.r = utf8.RUNE_EOF
} else {
t.offset += t.w
t.r, t.w = utf8.decode_rune_in_string(t.data[t.offset:])
t.pos.column = t.offset - t.curr_line_offset
if t.offset >= len(t.data) {
t.r = utf8.RUNE_EOF
}
}
t.offset += t.w
t.r, t.w = utf8.decode_rune(t.data[t.offset:])
t.pos.column = t.offset - t.curr_line_offset
return t.r
}
@@ -120,18 +125,21 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
return false
}
skip_whitespace :: proc(t: ^Tokenizer) -> rune {
skip_whitespace :: proc(t: ^Tokenizer, on_newline: bool) -> rune {
loop: for t.offset < len(t.data) {
switch t.r {
case ' ', '\t', '\v', '\f', '\r':
next_rune(t)
case '\n':
if on_newline {
break loop
}
t.line += 1
t.curr_line_offset = t.offset
t.pos.column = 1
next_rune(t)
case:
if t.spec == .JSON5 {
if t.spec != .JSON {
switch t.r {
case 0x2028, 0x2029, 0xFEFF:
next_rune(t)
@@ -164,7 +172,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
}
}
skip_whitespace(t)
skip_whitespace(t, t.insert_comma)
token.pos = t.pos
@@ -179,6 +187,12 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
case utf8.RUNE_EOF, '\x00':
token.kind = .EOF
err = .EOF
case '\n':
t.insert_comma = false
token.text = ","
token.kind = .Comma
return
case 'A'..='Z', 'a'..='z', '_':
token.kind = .Ident
@@ -190,7 +204,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
case "false": token.kind = .False
case "true": token.kind = .True
case:
if t.spec == .JSON5 {
if t.spec != .JSON {
switch str {
case "Infinity": token.kind = .Infinity
case "NaN": token.kind = .NaN
@@ -200,7 +214,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
case '+':
err = .Illegal_Character
if t.spec != .JSON5 {
if t.spec == .JSON {
break
}
fallthrough
@@ -213,7 +227,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
// Illegal use of +/-
err = .Illegal_Character
if t.spec == .JSON5 {
if t.spec != .JSON {
if t.r == 'I' || t.r == 'N' {
skip_alphanum(t)
}
@@ -228,7 +242,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
case '0'..='9':
token.kind = t.parse_integers ? .Integer : .Float
if t.spec == .JSON5 { // Hexadecimal Numbers
if t.spec != .JSON { // Hexadecimal Numbers
if curr_rune == '0' && (t.r == 'x' || t.r == 'X') {
next_rune(t)
skip_hex_digits(t)
@@ -258,7 +272,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
case '.':
err = .Illegal_Character
if t.spec == .JSON5 { // Allow leading decimal point
if t.spec != .JSON { // Allow leading decimal point
skip_digits(t)
if t.r == 'e' || t.r == 'E' {
switch r := next_rune(t); r {
@@ -276,7 +290,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
case '\'':
err = .Illegal_Character
if t.spec != .JSON5 {
if t.spec == .JSON {
break
}
fallthrough
@@ -304,16 +318,25 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
}
case ',': token.kind = .Comma
case ',':
token.kind = .Comma
t.insert_comma = false
case ':': token.kind = .Colon
case '{': token.kind = .Open_Brace
case '}': token.kind = .Close_Brace
case '[': token.kind = .Open_Bracket
case ']': token.kind = .Close_Bracket
case '=':
if t.spec == .MJSON {
token.kind = .Colon
} else {
err = .Illegal_Character
}
case '/':
err = .Illegal_Character
if t.spec == .JSON5 {
if t.spec != .JSON {
switch t.r {
case '/':
// Single-line comments
@@ -339,6 +362,21 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
}
token.text = string(t.data[token.offset : t.offset])
if t.spec == .MJSON {
switch token.kind {
case .Invalid:
// preserve insert_comma info
case .EOF:
t.insert_comma = false
case .Colon, .Comma, .Open_Brace, .Open_Bracket:
t.insert_comma = false
case .Null, .False, .True, .Infinity, .NaN,
.Ident, .Integer, .Float, .String,
.Close_Brace, .Close_Bracket:
t.insert_comma = true
}
}
return
}
@@ -356,7 +394,7 @@ is_valid_number :: proc(str: string, spec: Specification) -> bool {
if s == "" {
return false
}
} else if spec == .JSON5 {
} else if spec != .JSON {
if s[0] == '+' { // Allow positive sign
s = s[1:]
if s == "" {
@@ -374,7 +412,7 @@ is_valid_number :: proc(str: string, spec: Specification) -> bool {
s = s[1:]
}
case '.':
if spec == .JSON5 { // Allow leading decimal point
if spec != .JSON { // Allow leading decimal point
s = s[1:]
} else {
return false
@@ -383,7 +421,7 @@ is_valid_number :: proc(str: string, spec: Specification) -> bool {
return false
}
if spec == .JSON5 {
if spec != .JSON {
if len(s) == 1 && s[0] == '.' { // Allow trailing decimal point
return true
}
@@ -424,7 +462,7 @@ is_valid_string_literal :: proc(str: string, spec: Specification) -> bool {
return false
}
if s[0] != '"' || s[len(s)-1] != '"' {
if spec == .JSON5 {
if spec != .JSON {
if s[0] != '\'' || s[len(s)-1] != '\'' {
return false
}

View File

@@ -3,7 +3,7 @@ package json
Specification :: enum {
JSON,
JSON5, // https://json5.org/
// MJSON, // http://bitsquid.blogspot.com/2009/09/json-configuration-data.html
MJSON, // https://bitsquid.blogspot.com/2009/10/simplified-json-notation.html
}
DEFAULT_SPECIFICATION :: Specification.JSON5

View File

@@ -1,5 +1,6 @@
package json
import "core:fmt"
import "core:mem"
import "core:math"
import "core:reflect"
@@ -48,6 +49,14 @@ unmarshal_any :: proc(data: []byte, v: any, spec := DEFAULT_SPECIFICATION, alloc
}
context.allocator = p.allocator
if p.spec == .MJSON {
#partial switch p.curr_token.kind {
case .Ident, .String:
return unmarsal_object(&p, data, .EOF)
}
}
return unmarsal_value(&p, data)
}
@@ -244,13 +253,13 @@ unmarsal_value :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) {
case .Open_Brace:
return unmarsal_object(p, v)
return unmarsal_object(p, v, .Close_Brace)
case .Open_Bracket:
return unmarsal_array(p, v)
case:
if p.spec == Specification.JSON5 {
if p.spec != .JSON {
#partial switch token.kind {
case .Infinity:
advance_token(p)
@@ -285,16 +294,18 @@ unmarsal_value :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) {
unmarsal_expect_token :: proc(p: ^Parser, kind: Token_Kind, loc := #caller_location) -> Token {
prev := p.curr_token
err := expect_token(p, kind)
assert(err == nil, "unmarsal_expect_token", loc)
fmt.assertf(condition = err == nil, fmt="unmarsal_expect_token: %v, got %v", args={kind, prev.kind}, loc=loc)
return prev
}
@(private)
unmarsal_object :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) {
unmarsal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unmarshal_Error) {
UNSUPPORTED_TYPE := Unsupported_Type_Error{v.id, p.curr_token}
assert(expect_token(p, .Open_Brace) == nil)
if end_token == .Close_Brace {
assert(expect_token(p, .Open_Brace) == nil)
}
v := v
v = reflect.any_base(v)
@@ -306,7 +317,7 @@ unmarsal_object :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) {
return UNSUPPORTED_TYPE
}
struct_loop: for p.curr_token.kind != .Close_Brace {
struct_loop: for p.curr_token.kind != end_token {
key, _ := parse_object_key(p, p.allocator)
defer delete(key, p.allocator)
@@ -347,21 +358,10 @@ unmarsal_object :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) {
field_ptr := rawptr(uintptr(v.data) + offset)
field := any{field_ptr, type.id}
unmarsal_value(p, field) or_return
if p.spec == Specification.JSON5 {
// Allow trailing commas
if allow_token(p, .Comma) {
continue struct_loop
}
} else {
// Disallow trailing commas
if allow_token(p, .Comma) {
continue struct_loop
} else {
break struct_loop
}
if parse_comma(p) {
break struct_loop
}
continue struct_loop
}
@@ -384,7 +384,7 @@ unmarsal_object :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) {
map_backing_value := any{raw_data(elem_backing), t.value.id}
map_loop: for p.curr_token.kind != .Close_Brace {
map_loop: for p.curr_token.kind != end_token {
key, _ := parse_object_key(p, p.allocator)
unmarsal_expect_token(p, .Colon)
@@ -410,19 +410,9 @@ unmarsal_object :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) {
if set_ptr == nil {
delete(key, p.allocator)
}
if p.spec == Specification.JSON5 {
// Allow trailing commas
if allow_token(p, .Comma) {
continue map_loop
}
} else {
// Disallow trailing commas
if allow_token(p, .Comma) {
continue map_loop
} else {
break map_loop
}
if parse_comma(p) {
break map_loop
}
}
@@ -430,7 +420,7 @@ unmarsal_object :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) {
index_type := reflect.type_info_base(t.index)
enum_type := index_type.variant.(reflect.Type_Info_Enum)
enumerated_array_loop: for p.curr_token.kind != .Close_Brace {
enumerated_array_loop: for p.curr_token.kind != end_token {
key, _ := parse_object_key(p, p.allocator)
unmarsal_expect_token(p, .Colon)
defer delete(key, p.allocator)
@@ -450,19 +440,9 @@ unmarsal_object :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) {
index_any := any{index_ptr, t.elem.id}
unmarsal_value(p, index_any) or_return
if p.spec == Specification.JSON5 {
// Allow trailing commas
if allow_token(p, .Comma) {
continue enumerated_array_loop
}
} else {
// Disallow trailing commas
if allow_token(p, .Comma) {
continue enumerated_array_loop
} else {
break enumerated_array_loop
}
if parse_comma(p) {
break enumerated_array_loop
}
}
@@ -472,7 +452,9 @@ unmarsal_object :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) {
return UNSUPPORTED_TYPE
}
assert(expect_token(p, .Close_Brace) == nil)
if end_token == .Close_Brace {
assert(expect_token(p, .Close_Brace) == nil)
}
return
}
@@ -485,10 +467,8 @@ unmarsal_count_array :: proc(p: ^Parser) -> (length: uintptr) {
array_length_loop: for p.curr_token.kind != .Close_Bracket {
_, _ = parse_value(p)
length += 1
if allow_token(p, .Comma) {
continue
} else {
if parse_comma(p) {
break
}
}
@@ -509,9 +489,7 @@ unmarsal_array :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) {
unmarsal_value(p, elem) or_return
if allow_token(p, .Comma) {
continue
} else {
if parse_comma(p) {
break
}
}

View File

@@ -5,66 +5,64 @@ import "core:mem"
// NOTE(bill): is_valid will not check for duplicate keys
is_valid :: proc(data: []byte, spec := DEFAULT_SPECIFICATION, parse_integers := false) -> bool {
p := make_parser(data, spec, parse_integers, mem.nil_allocator())
if p.spec == Specification.JSON5 {
switch p.spec {
case .JSON:
return validate_object(&p)
case .JSON5:
return validate_value(&p)
case .MJSON:
#partial switch p.curr_token.kind {
case .Ident, .String:
return validate_object_body(&p, .EOF)
}
return validate_value(&p)
}
return validate_object(&p)
}
validate_object_key :: proc(p: ^Parser) -> bool {
tok := p.curr_token
if p.spec == Specification.JSON5 {
if tok.kind == .String {
expect_token(p, .String)
return true
} else if tok.kind == .Ident {
expect_token(p, .Ident)
if p.spec != .JSON {
if allow_token(p, .Ident) {
return true
}
}
err := expect_token(p, .String)
return err == Error.None
return err == .None
}
validate_object :: proc(p: ^Parser) -> bool {
if err := expect_token(p, .Open_Brace); err != Error.None {
return false
}
for p.curr_token.kind != .Close_Brace {
validate_object_body :: proc(p: ^Parser, end_token: Token_Kind) -> bool {
for p.curr_token.kind != end_token {
if !validate_object_key(p) {
return false
}
if colon_err := expect_token(p, .Colon); colon_err != Error.None {
if parse_colon(p) != nil {
return false
}
validate_value(p) or_return
if !validate_value(p) {
return false
}
if p.spec == Specification.JSON5 {
// Allow trailing commas
if allow_token(p, .Comma) {
continue
}
} else {
// Disallow trailing commas
if allow_token(p, .Comma) {
continue
} else {
break
}
if parse_comma(p) {
break
}
}
return true
}
if err := expect_token(p, .Close_Brace); err != Error.None {
validate_object :: proc(p: ^Parser) -> bool {
if err := expect_token(p, .Open_Brace); err != .None {
return false
}
validate_object_body(p, .Close_Brace) or_return
if err := expect_token(p, .Close_Brace); err != .None {
return false
}
return true
}
validate_array :: proc(p: ^Parser) -> bool {
if err := expect_token(p, .Open_Bracket); err != Error.None {
if err := expect_token(p, .Open_Bracket); err != .None {
return false
}
@@ -73,15 +71,12 @@ validate_array :: proc(p: ^Parser) -> bool {
return false
}
// Disallow trailing commas for the time being
if allow_token(p, .Comma) {
continue
} else {
if parse_comma(p) {
break
}
}
if err := expect_token(p, .Close_Bracket); err != Error.None {
if err := expect_token(p, .Close_Bracket); err != .None {
return false
}
@@ -109,7 +104,7 @@ validate_value :: proc(p: ^Parser) -> bool {
return validate_array(p)
case:
if p.spec == Specification.JSON5 {
if p.spec != .JSON {
#partial switch token.kind {
case .Infinity, .NaN:
advance_token(p)