Begin work on package json

This commit is contained in:
gingerBill
2019-01-06 20:37:12 +00:00
parent 6e6a053823
commit c5def60224
3 changed files with 715 additions and 0 deletions

View File

@@ -0,0 +1,323 @@
package json
import "core:mem"
import "core:unicode/utf8"
import "core:strconv"
import "core:strings"
Parser :: struct {
tok: Tokenizer,
curr_token: Token,
allocator: mem.Allocator,
}
make_parser :: proc(data: string, allocator := context.allocator) -> Parser {
p: Parser;
p.tok = make_tokenizer(data);
p.allocator = allocator;
assert(p.allocator.procedure != nil);
advance_token(&p);
return p;
}
parse :: proc(data: string, allocator := context.allocator) -> (Value, Error) {
p := make_parser(data, allocator);
return parse_object(&p);
}
advance_token :: proc(p: ^Parser) -> (Token, Error) {
err: Error;
prev := p.curr_token;
p.curr_token, err = get_token(&p.tok);
return prev, err;
}
allow_token :: proc(p: ^Parser, kind: Kind) -> bool {
if p.curr_token.kind == kind {
advance_token(p);
return true;
}
return false;
}
expect_token :: proc(p: ^Parser, kind: Kind) -> Error {
prev := p.curr_token;
advance_token(p);
if prev.kind == kind {
return Error.None;
}
return Error.Unexpected_Token;
}
parse_value :: proc(p: ^Parser) -> (value: Value, err: Error) {
value.pos = p.curr_token.pos;
token := p.curr_token;
switch token.kind {
case Kind.Null:
value.value = Null{};
advance_token(p);
return;
case Kind.False:
value.value = Boolean(false);
advance_token(p);
return;
case Kind.True:
value.value = Boolean(true);
advance_token(p);
return;
case Kind.Integer:
value.value = Integer(strconv.parse_i64(token.text));
advance_token(p);
return;
case Kind.Float:
value.value = Float(strconv.parse_f64(token.text));
advance_token(p);
return;
case Kind.String:
value.value = String(unquote_string(token, p.allocator));
advance_token(p);
return;
case Kind.Open_Brace:
return parse_object(p);
case Kind.Open_Bracket:
return parse_array(p);
}
err = Error.Unexpected_Token;
advance_token(p);
return;
}
parse_array :: proc(p: ^Parser) -> (value: Value, err: Error) {
value.pos = p.curr_token.pos;
if err = expect_token(p, Kind.Open_Bracket); err != Error.None {
return;
}
array: Array;
array.allocator = p.allocator;
defer if err != Error.None {
for elem in array {
destroy_value(elem);
}
delete(array);
}
for p.curr_token.kind != Kind.Close_Bracket {
elem, elem_err := parse_value(p);
if elem_err != Error.None {
err = elem_err;
return;
}
append(&array, elem);
// Disallow trailing commas for the time being
if allow_token(p, Kind.Comma) {
continue;
} else {
break;
}
}
if err = expect_token(p, Kind.Close_Bracket); err != Error.None {
return;
}
value.value = array;
return;
}
parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) {
value.pos = p.curr_token.pos;
if err = expect_token(p, Kind.Open_Brace); err != Error.None {
value.pos = p.curr_token.pos;
return;
}
obj: Object;
obj.allocator = p.allocator;
defer if err != Error.None {
for key, elem in obj {
delete(key);
destroy_value(elem);
}
delete(obj);
}
for p.curr_token.kind != Kind.Close_Brace {
tok := p.curr_token;
if tok_err := expect_token(p, Kind.String); tok_err != Error.None {
err = Error.Expected_String_For_Object_Key;
value.pos = p.curr_token.pos;
return;
}
key := unquote_string(tok, p.allocator);
if colon_err := expect_token(p, Kind.Colon); colon_err != Error.None {
err = Error.Expected_Colon_After_Key;
value.pos = p.curr_token.pos;
return;
}
elem, elem_err := parse_value(p);
if elem_err != Error.None {
err = elem_err;
value.pos = p.curr_token.pos;
return;
}
if key in obj {
err = Error.Duplicate_Object_Key;
value.pos = p.curr_token.pos;
delete(key);
return;
}
obj[key] = elem;
// Disallow trailing commas for the time being
if allow_token(p, Kind.Comma) {
continue;
} else {
break;
}
}
if err = expect_token(p, Kind.Close_Brace); err != Error.None {
value.pos = p.curr_token.pos;
return;
}
value.value = obj;
return;
}
// IMPORTANT NOTE(bill): unquote_string assumes a mostly valid string
unquote_string :: proc(token: Token, allocator := context.allocator) -> string {
get_u4_rune :: proc(s: string) -> rune {
if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
return -1;
}
r: rune;
for c in s[2:6] {
x: rune;
switch c {
case '0'..'9': x = c - '0';
case 'a'..'f': x = c - 'a' + 10;
case 'A'..'F': x = c - 'A' + 10;
case: return -1;
}
r = r*16 + x;
}
return r;
}
if token.kind != Kind.String {
return "";
}
s := token.text;
if len(s) <= 2 {
return "";
}
s = s[1:len(s)-1];
i := 0;
for i < len(s) {
c := s[i];
if c == '\\' || c == '"' || c < ' ' {
break;
}
if c < utf8.RUNE_SELF {
i += 1;
continue;
}
r, w := utf8.decode_rune_in_string(s);
if r == utf8.RUNE_ERROR && w == 1 {
break;
}
i += w;
}
if i == len(s) {
return strings.new_string(s, allocator);
}
b := make([]byte, len(s) + 2*utf8.UTF_MAX, allocator);
w := copy(b, cast([]byte)s[0:i]);
loop: for i < len(s) {
c := s[i];
switch {
case c == '\\':
i += 1;
if i >= len(s) {
break loop;
}
switch s[i] {
case: break loop;
case '"', '\'', '\\', '/':
b[w] = s[i];
i += 1;
w += 1;
case 'b':
b[w] = '\b';
i += 1;
w += 1;
case 'f':
b[w] = '\f';
i += 1;
w += 1;
case 'r':
b[w] = '\r';
i += 1;
w += 1;
case 't':
b[w] = '\t';
i += 1;
w += 1;
case 'n':
b[w] = '\n';
i += 1;
w += 1;
case 'u':
i -= 1; // Include the \u in the check for sanity sake
r := get_u4_rune(s[i:]);
if r < 0 {
break loop;
}
i += 6;
buf, buf_width := utf8.encode_rune(r);
copy(b[w:], buf[:buf_width]);
w += buf_width;
}
case c == '"', c < ' ':
break loop;
case c < utf8.RUNE_SELF:
b[w] = c;
i += 1;
w += 1;
case:
r, width := utf8.decode_rune_in_string(s[i:]);
i += width;
buf, buf_width := utf8.encode_rune(r);
assert(buf_width <= width);
copy(b[w:], buf[:buf_width]);
w += buf_width;
}
}
return string(b[:w]);
}

View File

@@ -0,0 +1,322 @@
package json
import "core:unicode/utf8"
Token :: struct {
using pos: Pos,
kind: Kind,
text: string,
}
Kind :: enum {
Invalid,
Null,
False,
True,
Ident,
Integer,
Float,
String,
Colon,
Comma,
Open_Brace,
Close_Brace,
Open_Bracket,
Close_Bracket,
}
Tokenizer :: struct {
using pos: Pos,
data: string,
r: rune, // current rune
w: int, // current rune width in bytes
curr_line_offset: int,
}
make_tokenizer :: proc(data: string) -> Tokenizer {
t := Tokenizer{pos = {line=1}, data = data};
next_rune(&t);
return t;
}
next_rune :: proc(t: ^Tokenizer) -> rune #no_bounds_check {
if t.offset >= len(t.data) {
return utf8.RUNE_EOF;
}
t.offset += t.w;
t.r, t.w = utf8.decode_rune_in_string(t.data[t.offset:]);
t.pos.column = t.offset - t.curr_line_offset;
return t.r;
}
get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
skip_digits :: proc(t: ^Tokenizer) {
for t.offset < len(t.data) {
next_rune(t);
if '0' <= t.r && t.r <= '9' {
// Okay
} else {
return;
}
}
}
scan_espace :: proc(t: ^Tokenizer) -> bool {
switch t.r {
case '"', '\'', '\\', '/', 'b', 'n', 'r', 't', 'f':
next_rune(t);
return true;
case 'u':
// Expect 4 hexadecimal digits
for i := 0; i < 4; i += 1 {
r := next_rune(t);
switch r {
case '0'..'9', 'a'..'f', 'A'..'F':
// Okay
case:
return false;
}
}
case:
// Ignore the next rune regardless
next_rune(t);
}
return false;
}
skip_whitespace :: proc(t: ^Tokenizer) -> rune {
loop: for t.offset < len(t.data) {
switch t.r {
case ' ', '\t', '\v', '\f', '\r':
next_rune(t);
case '\n':
t.line += 1;
t.curr_line_offset = t.offset;
t.pos.column = 1;
next_rune(t);
case:
break loop;
}
}
return t.r;
}
skip_whitespace(t);
token.pos = t.pos;
token.kind = Kind.Invalid;
curr_rune := t.r;
next_rune(t);
switch curr_rune {
case utf8.RUNE_ERROR:
err = Error.Illegal_Character;
case utf8.RUNE_EOF, '\x00':
err = Error.EOF;
case 'A'..'Z', 'a'..'z', '_':
token.kind = Kind.Ident;
for t.offset < len(t.data) {
switch next_rune(t) {
case 'A'..'Z', 'a'..'z', '0'..'9', '_':
continue;
}
break;
}
switch str := t.data[token.offset:t.offset]; str {
case "null": token.kind = Kind.Null;
case "false": token.kind = Kind.False;
case "true": token.kind = Kind.True;
}
case '-':
switch t.r {
case '0'..'9':
// Okay
case:
// Illegal use of +/-
err = Error.Illegal_Character;
break;
}
fallthrough;
case '0'..'9':
token.kind = Kind.Integer;
skip_digits(t);
if t.r == '.' {
token.kind = Kind.Float;
next_rune(t);
skip_digits(t);
}
if t.r == 'e' || t.r == 'E' {
switch r := next_rune(t); r {
case '+', '-':
next_rune(t);
}
skip_digits(t);
}
str := t.data[token.offset:t.offset];
if !is_valid_number(str) {
err = Error.Invalid_Number;
}
case '"':
token.kind = Kind.String;
quote := curr_rune;
for t.offset < len(t.data) {
r := t.r;
if r == '\n' || r < 0 {
err = Error.String_Not_Terminated;
break;
}
next_rune(t);
if r == quote {
break;
}
if r == '\\' {
scan_espace(t);
}
}
if !is_valid_string_literal(t.data[token.offset : t.offset]) {
err = Error.Invalid_String;
}
case ',': token.kind = Kind.Comma;
case ':': token.kind = Kind.Colon;
case '{': token.kind = Kind.Open_Brace;
case '}': token.kind = Kind.Close_Brace;
case '[': token.kind = Kind.Open_Bracket;
case ']': token.kind = Kind.Close_Bracket;
case: err = Error.Illegal_Character;
}
token.text = t.data[token.offset : t.offset];
return;
}
is_valid_number :: proc(s: string) -> bool {
if s == "" {
return false;
}
if s[0] == '-' {
s = s[1:];
if s == "" {
return false;
}
}
switch s[0] {
case '0':
s = s[1:];
case '1'..'9':
s = s[1:];
for len(s) > 0 && '0' <= s[0] && s[0] <= '9' do s = s[1:];
case:
return false;
}
if len(s) >= 2 && s[0] == '.' && '0' <= s[1] && s[1] <= '9' {
s = s[2:];
for len(s) > 0 && '0' <= s[0] && s[0] <= '9' do s = s[1:];
}
if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') {
s = s[1:];
switch s[0] {
case '+', '-':
s = s[1:];
if s == "" {
return false;
}
}
for len(s) > 0 && '0' <= s[0] && s[0] <= '9' do s = s[1:];
}
// The string should be empty now to be valid
return s == "";
}
is_valid_string_literal :: proc(s: string) -> bool {
if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
return false;
}
s = s[1 : len(s)-1];
i := 0;
for i < len(s) {
c := s[i];
switch {
case c == '\\':
i += 1;
if i >= len(s) {
return false;
}
switch s[i] {
case '"', '\'', '\\', '/', 'b', 'n', 'r', 't', 'f':
i += 1;
case 'u':
if i >= len(s) {
return false;
}
hex := s[i+1:];
if len(hex) < 4 {
return false;
}
hex = hex[:4];
i += 5;
for j := 0; j < 4; j += 1 {
c := hex[j];
switch c {
case '0'..'9', 'a'..'z', 'A'..'Z':
// Okay
case:
return false;
}
}
case: return false;
}
case c == '"', c < ' ':
return false;
case c < utf8.RUNE_SELF:
i += 1;
case:
r, width := utf8.decode_rune_in_string(s[i:]);
if r == utf8.RUNE_ERROR && width == 1 {
return false;
}
i += width;
}
}
if i == len(s) {
return true;
}
return true;
}

View File

@@ -0,0 +1,70 @@
package json
import "core:strconv"
Null :: distinct rawptr;
Integer :: i64;
Float :: f64;
Boolean :: bool;
String :: string;
Array :: distinct [dynamic]Value;
Object :: distinct map[string]Value;
Value :: struct {
pos: Pos,
value: union {
Null,
Integer,
Float,
Boolean,
String,
Array,
Object,
}
}
Pos :: struct {
offset: int,
line: int,
column: int,
}
Error :: enum {
None,
EOF, // Not necessarily an error
// Tokenizing Errors
Illegal_Character,
Invalid_Number,
String_Not_Terminated,
Invalid_String,
// Parsing Errors
Unexpected_Token,
Expected_String_For_Object_Key,
Duplicate_Object_Key,
Expected_Colon_After_Key,
}
destroy_value :: proc(value: Value) {
switch v in value.value {
case Object:
for key, elem in v {
delete(key);
destroy_value(elem);
}
delete(v);
case Array:
for elem in v do destroy_value(elem);
delete(v);
case String:
delete(v);
}
}