Remove package core:encoding/cel

This commit is contained in:
gingerBill
2021-08-08 12:50:38 +01:00
parent 5fd64f48ee
commit 48538aa792
3 changed files with 0 additions and 1426 deletions

View File

@@ -1,852 +0,0 @@
package cel;
import "core:fmt"
import "core:strconv"
import "core:unicode/utf8"
import "core:strings"
Array :: []Value;
Dict :: map[string]Value;
Nil_Value :: struct{};
Value :: union {
Nil_Value,
bool, i64, f64, string,
Array, Dict,
}
Parser :: struct {
tokens: [dynamic]Token,
prev_token: Token,
curr_token: Token,
curr_token_index: int,
allocated_strings: [dynamic]string,
error_count: int,
root: Dict,
dict_stack: [dynamic]^Dict, // NOTE: Pointers may be stored on the stack
}
print_value :: proc(value: Value, pretty := true, indent := 0) {
print_indent :: proc(indent: int) {
for _ in 0..<indent {
fmt.print("\t");
}
}
switch v in value {
case bool: fmt.print(v);
case i64: fmt.print(v);
case f64: fmt.print(v);
case string: fmt.print(v);
case Array:
fmt.print("[");
if pretty { fmt.println(); }
for e, i in v {
if pretty {
print_indent(indent+1);
print_value(e, pretty, indent+1);
fmt.println(",");
} else {
if i > 0 { fmt.print(", "); }
print_value(e);
}
}
if pretty { print_indent(indent); }
fmt.print("]");
case Dict:
fmt.print("{");
if pretty { fmt.println(); }
i := 0;
for name, val in v {
if pretty {
print_indent(indent+1);
fmt.printf("%s = ", name);
print_value(val, pretty, indent+1);
fmt.println(",");
} else {
if i > 0 { fmt.print(", "); }
fmt.printf("%s = ", name);
print_value(val, pretty, indent+1);
i += 1;
}
}
if pretty { print_indent(indent); }
fmt.print("}");
case:
fmt.print("nil");
case Nil_Value:
fmt.print("nil");
}
}
print :: proc(p: ^Parser, pretty := false) {
for name, val in p.root {
fmt.printf("%s = ", name);
print_value(val, pretty);
fmt.println(";");
}
}
create_from_string :: proc(src: string) -> (^Parser, bool) {
return init(transmute([]byte)src);
}
init :: proc(src: []byte) -> (^Parser, bool) {
t: Tokenizer;
tokenizer_init(&t, src);
return create_from_tokenizer(&t);
}
create_from_tokenizer :: proc(t: ^Tokenizer) -> (^Parser, bool) {
p := new(Parser);
for {
tok := scan(t);
if tok.kind == .Illegal {
return p, false;
}
append(&p.tokens, tok);
if tok.kind == .EOF {
break;
}
}
if t.error_count > 0 {
return p, false;
}
if len(p.tokens) == 0 {
tok := Token{kind = .EOF};
tok.line, tok.column = 1, 1;
append(&p.tokens, tok);
return p, true;
}
p.curr_token_index = 0;
p.prev_token = p.tokens[p.curr_token_index];
p.curr_token = p.tokens[p.curr_token_index];
p.root = Dict{};
p.dict_stack = make([dynamic]^Dict, 0, 4);
append(&p.dict_stack, &p.root);
for p.curr_token.kind != .EOF &&
p.curr_token.kind != .Illegal &&
p.curr_token_index < len(p.tokens) {
if !parse_assignment(p) {
break;
}
}
return p, true;
}
destroy :: proc(p: ^Parser) {
destroy_value :: proc(value: Value) {
#partial switch v in value {
case Array:
for elem in v {
destroy_value(elem);
}
delete(v);
case Dict:
for _, dv in v {
destroy_value(dv);
}
delete(v);
}
}
delete(p.tokens);
for s in p.allocated_strings {
delete(s);
}
delete(p.allocated_strings);
delete(p.dict_stack);
destroy_value(p.root);
free(p);
}
error :: proc(p: ^Parser, pos: Pos, msg: string, args: ..any) {
fmt.eprintf("%s(%d:%d) Error: ", pos.file, pos.line, pos.column);
fmt.eprintf(msg, ..args);
fmt.eprintln();
p.error_count += 1;
}
next_token :: proc(p: ^Parser) -> Token {
p.prev_token = p.curr_token;
prev := p.prev_token;
if p.curr_token_index+1 < len(p.tokens) {
p.curr_token_index += 1;
p.curr_token = p.tokens[p.curr_token_index];
return prev;
}
p.curr_token_index = len(p.tokens);
p.curr_token = p.tokens[p.curr_token_index-1];
error(p, prev.pos, "Token is EOF");
return prev;
}
unquote_char :: proc(str: string, quote: byte) -> (r: rune, multiple_bytes: bool, tail_string: string, success: bool) {
hex_to_int :: proc(c: byte) -> int {
switch c {
case '0'..='9': return int(c-'0');
case 'a'..='f': return int(c-'a')+10;
case 'A'..='F': return int(c-'A')+10;
}
return -1;
}
w: int;
if str[0] == quote && quote == '"' {
return;
} else if str[0] >= 0x80 {
r, w = utf8.decode_rune_in_string(str);
return r, true, str[w:], true;
} else if str[0] != '\\' {
return rune(str[0]), false, str[1:], true;
}
if len(str) <= 1 {
return;
}
s := str;
c := s[1];
s = s[2:];
switch c {
case:
return;
case 'a': r = '\a';
case 'b': r = '\b';
case 'f': r = '\f';
case 'n': r = '\n';
case 'r': r = '\r';
case 't': r = '\t';
case 'v': r = '\v';
case '\\': r = '\\';
case '"': r = '"';
case '\'': r = '\'';
case '0'..='7':
v := int(c-'0');
if len(s) < 2 {
return;
}
for i in 0..<len(s) {
d := int(s[i]-'0');
if d < 0 || d > 7 {
return;
}
v = (v<<3) | d;
}
s = s[2:];
if v > 0xff {
return;
}
r = rune(v);
case 'x', 'u', 'U':
count: int;
switch c {
case 'x': count = 2;
case 'u': count = 4;
case 'U': count = 8;
}
if len(s) < count {
return;
}
for i in 0..<count {
d := hex_to_int(s[i]);
if d < 0 {
return;
}
r = (r<<4) | rune(d);
}
s = s[count:];
if c == 'x' {
break;
}
if r > utf8.MAX_RUNE {
return;
}
multiple_bytes = true;
}
success = true;
tail_string = s;
return;
}
unquote_string :: proc(p: ^Parser, t: Token) -> (string, bool) {
if t.kind != .String {
return t.lit, true;
}
s := t.lit;
quote := '"';
if s == `""` {
return "", true;
}
if strings.contains_rune(s, '\n') >= 0 {
return s, false;
}
if strings.contains_rune(s, '\\') < 0 && strings.contains_rune(s, quote) < 0 {
if quote == '"' {
return s, true;
}
}
buf_len := 3*len(s) / 2;
buf := make([]byte, buf_len);
offset := 0;
for len(s) > 0 {
r, multiple_bytes, tail_string, ok := unquote_char(s, byte(quote));
if !ok {
delete(buf);
return s, false;
}
s = tail_string;
if r < 0x80 || !multiple_bytes {
buf[offset] = byte(r);
offset += 1;
} else {
b, w := utf8.encode_rune(r);
copy(buf[offset:], b[:w]);
offset += w;
}
}
new_string := string(buf[:offset]);
append(&p.allocated_strings, new_string);
return new_string, true;
}
allow_token :: proc(p: ^Parser, kind: Kind) -> bool {
if p.curr_token.kind == kind {
next_token(p);
return true;
}
return false;
}
expect_token :: proc(p: ^Parser, kind: Kind) -> Token {
prev := p.curr_token;
if prev.kind != kind {
got := prev.lit;
if got == "\n" {
got = ";";
}
error(p, prev.pos, "Expected %s, got %s", kind_to_string[kind], got);
}
next_token(p);
return prev;
}
expect_operator :: proc(p: ^Parser) -> Token {
prev := p.curr_token;
if !is_operator(prev.kind) {
error(p, prev.pos, "Expected an operator, got %s", prev.lit);
}
next_token(p);
return prev;
}
fix_advance :: proc(p: ^Parser) {
for {
#partial switch t := p.curr_token; t.kind {
case .EOF, .Semicolon:
return;
}
next_token(p);
}
}
copy_value :: proc(value: Value) -> Value {
#partial switch v in value {
case Array:
a := make(Array, len(v));
for elem, idx in v {
a[idx] = copy_value(elem);
}
return a;
case Dict:
d := make(Dict, cap(v));
for key, val in v {
d[key] = copy_value(val);
}
return d;
}
return value;
}
lookup_value :: proc(p: ^Parser, name: string) -> (Value, bool) {
for i := len(p.dict_stack)-1; i >= 0; i -= 1 {
d := p.dict_stack[i];
if val, ok := d[name]; ok {
return copy_value(val), true;
}
}
return nil, false;
}
parse_operand :: proc(p: ^Parser) -> (Value, Pos) {
tok := p.curr_token;
#partial switch p.curr_token.kind {
case .Ident:
next_token(p);
v, ok := lookup_value(p, tok.lit);
if !ok { error(p, tok.pos, "Undeclared identifier %s", tok.lit); }
return v, tok.pos;
case .True:
next_token(p);
return true, tok.pos;
case .False:
next_token(p);
return false, tok.pos;
case .Nil:
next_token(p);
return Nil_Value{}, tok.pos;
case .Integer:
next_token(p);
i, _ := strconv.parse_i64(tok.lit);
return i, tok.pos;
case .Float:
next_token(p);
f, _ := strconv.parse_f64(tok.lit);
return f, tok.pos;
case .String:
next_token(p);
str, ok := unquote_string(p, tok);
if !ok { error(p, tok.pos, "Unable to unquote string"); }
return string(str), tok.pos;
case .Open_Paren:
expect_token(p, .Open_Paren);
expr, _ := parse_expr(p);
expect_token(p, .Close_Paren);
return expr, tok.pos;
case .Open_Bracket:
expect_token(p, .Open_Bracket);
elems := make([dynamic]Value, 0, 4);
for p.curr_token.kind != .Close_Bracket &&
p.curr_token.kind != .EOF {
elem, _ := parse_expr(p);
append(&elems, elem);
if p.curr_token.kind == .Semicolon && p.curr_token.lit == "\n" {
next_token(p);
} else if !allow_token(p, .Comma) {
break;
}
}
expect_token(p, .Close_Bracket);
return Array(elems[:]), tok.pos;
case .Open_Brace:
expect_token(p, .Open_Brace);
dict := Dict{};
append(&p.dict_stack, &dict);
defer pop(&p.dict_stack);
for p.curr_token.kind != .Close_Brace &&
p.curr_token.kind != .EOF {
name_tok := p.curr_token;
if !allow_token(p, .Ident) && !allow_token(p, .String) {
name_tok = expect_token(p, .Ident);
}
name, ok := unquote_string(p, name_tok);
if !ok { error(p, tok.pos, "Unable to unquote string"); }
expect_token(p, .Assign);
elem, _ := parse_expr(p);
if _, ok2 := dict[name]; ok2 {
error(p, name_tok.pos, "Previous declaration of %s in this scope", name);
} else {
dict[name] = elem;
}
if p.curr_token.kind == .Semicolon && p.curr_token.lit == "\n" {
next_token(p);
} else if !allow_token(p, .Comma) {
break;
}
}
expect_token(p, .Close_Brace);
return dict, tok.pos;
}
return nil, tok.pos;
}
parse_atom_expr :: proc(p: ^Parser, operand: Value, pos: Pos) -> (Value, Pos) {
loop := true;
for operand := operand; loop; {
#partial switch p.curr_token.kind {
case .Period:
next_token(p);
tok := next_token(p);
#partial switch tok.kind {
case .Ident:
d, ok := operand.(Dict);
if !ok || d == nil {
error(p, tok.pos, "Expected a dictionary");
operand = nil;
continue;
}
name, usok := unquote_string(p, tok);
if !usok { error(p, tok.pos, "Unable to unquote string"); }
val, found := d[name];
if !found {
error(p, tok.pos, "Field %s not found in dictionary", name);
operand = nil;
continue;
}
operand = val;
case:
error(p, tok.pos, "Expected a selector, got %s", tok.kind);
operand = nil;
}
case .Open_Bracket:
expect_token(p, .Open_Bracket);
index, index_pos := parse_expr(p);
expect_token(p, .Close_Bracket);
#partial switch a in operand {
case Array:
i, ok := index.(i64);
if !ok {
error(p, index_pos, "Index must be an integer for an array");
operand = nil;
continue;
}
if 0 <= i && i < i64(len(a)) {
operand = a[i];
} else {
error(p, index_pos, "Index %d out of bounds range 0..%d", i, len(a));
operand = nil;
continue;
}
case Dict:
key, ok := index.(string);
if !ok {
error(p, index_pos, "Index must be a string for a dictionary");
operand = nil;
continue;
}
val, found := a[key];
if found {
operand = val;
} else {
error(p, index_pos, "`%s` was not found in the dictionary", key);
operand = nil;
continue;
}
case:
error(p, index_pos, "Indexing is only allowed on an array or dictionary");
}
case:
loop = false;
}
}
return operand, pos;
}
parse_unary_expr :: proc(p: ^Parser) -> (Value, Pos) {
op := p.curr_token;
#partial switch p.curr_token.kind {
case .At:
next_token(p);
tok := expect_token(p, .String);
v, ok := lookup_value(p, tok.lit);
if !ok { error(p, tok.pos, "Undeclared identifier %s", tok.lit); }
return parse_atom_expr(p, v, tok.pos);
case .Add, .Sub:
next_token(p);
// TODO(bill): Calcuate values as you go!
expr, pos := parse_unary_expr(p);
#partial switch e in expr {
case i64: if op.kind == .Sub { return -e, pos; }
case f64: if op.kind == .Sub { return -e, pos; }
case:
error(p, op.pos, "Unary operator %s can only be used on integers or floats", op.lit);
return nil, op.pos;
}
return expr, op.pos;
case .Not:
next_token(p);
expr, _ := parse_unary_expr(p);
if v, ok := expr.(bool); ok {
return !v, op.pos;
}
error(p, op.pos, "Unary operator %s can only be used on booleans", op.lit);
return nil, op.pos;
}
return parse_atom_expr(p, parse_operand(p));
}
value_order :: proc(v: Value) -> int {
#partial switch _ in v {
case bool, string:
return 1;
case i64:
return 2;
case f64:
return 3;
}
return 0;
}
match_values :: proc(left, right: ^Value) -> bool {
if value_order(right^) < value_order(left^) {
return match_values(right, left);
}
#partial switch x in left^ {
case:
right^ = left^;
case bool, string:
return true;
case i64:
#partial switch y in right^ {
case i64:
return true;
case f64:
left^ = f64(x);
return true;
}
case f64:
#partial switch y in right {
case f64:
return true;
}
}
return false;
}
calculate_binary_value :: proc(p: ^Parser, op: Kind, a_, b_: Value) -> (Value, bool) {
// TODO(bill): Calculate value as you go!
x, y := a_, b_;
match_values(&x, &y);
#partial switch a in x {
case: return x, true;
case bool:
b, ok := y.(bool);
if !ok { return nil, false; }
#partial switch op {
case .Eq: return a == b, true;
case .NotEq: return a != b, true;
case .And: return a && b, true;
case .Or: return a || b, true;
}
case i64:
b, ok := y.(i64);
if !ok { return nil, false; }
#partial switch op {
case .Add: return a + b, true;
case .Sub: return a - b, true;
case .Mul: return a * b, true;
case .Quo: return a / b, true;
case .Rem: return a % b, true;
case .Eq: return a == b, true;
case .NotEq: return a != b, true;
case .Lt: return a < b, true;
case .Gt: return a > b, true;
case .LtEq: return a <= b, true;
case .GtEq: return a >= b, true;
}
case f64:
b, ok := y.(f64);
if !ok { return nil, false; }
#partial switch op {
case .Add: return a + b, true;
case .Sub: return a - b, true;
case .Mul: return a * b, true;
case .Quo: return a / b, true;
case .Eq: return a == b, true;
case .NotEq: return a != b, true;
case .Lt: return a < b, true;
case .Gt: return a > b, true;
case .LtEq: return a <= b, true;
case .GtEq: return a >= b, true;
}
case string:
b, ok := y.(string);
if !ok { return nil, false; }
#partial switch op {
case .Add:
n := len(a) + len(b);
data := make([]byte, n);
copy(data[:], a);
copy(data[len(a):], b);
s := string(data);
append(&p.allocated_strings, s);
return s, true;
case .Eq: return a == b, true;
case .NotEq: return a != b, true;
case .Lt: return a < b, true;
case .Gt: return a > b, true;
case .LtEq: return a <= b, true;
case .GtEq: return a >= b, true;
}
}
return nil, false;
}
parse_binary_expr :: proc(p: ^Parser, prec_in: int) -> (Value, Pos) {
expr, pos := parse_unary_expr(p);
for prec := precedence(p.curr_token.kind); prec >= prec_in; prec -= 1 {
for {
op := p.curr_token;
op_prec := precedence(op.kind);
if op_prec != prec {
break;
}
expect_operator(p);
if op.kind == .Question {
cond := expr;
x, _ := parse_expr(p);
expect_token(p, .Colon);
y, _ := parse_expr(p);
if t, ok := cond.(bool); ok {
expr = t ? x : y;
} else {
error(p, pos, "Condition must be a boolean");
}
} else {
right, right_pos := parse_binary_expr(p, prec+1);
if right == nil {
error(p, right_pos, "Expected expression on the right-hand side of the binary operator %s", op.lit);
}
left := expr;
ok: bool;
expr, ok = calculate_binary_value(p, op.kind, left, right);
if !ok {
error(p, pos, "Invalid binary operation");
}
}
}
}
return expr, pos;
}
parse_expr :: proc(p: ^Parser) -> (Value, Pos) {
return parse_binary_expr(p, 1);
}
expect_semicolon :: proc(p: ^Parser) {
kind := p.curr_token.kind;
#partial switch kind {
case .Comma:
error(p, p.curr_token.pos, "Expected ';', got ','");
next_token(p);
case .Semicolon:
next_token(p);
case .EOF:
// okay
case:
error(p, p.curr_token.pos, "Expected ';', got %s", p.curr_token.lit);
fix_advance(p);
}
}
parse_assignment :: proc(p: ^Parser) -> bool {
top_dict :: proc(p: ^Parser) -> ^Dict {
assert(len(p.dict_stack) > 0);
return p.dict_stack[len(p.dict_stack)-1];
}
if p.curr_token.kind == .Semicolon {
next_token(p);
return true;
}
if p.curr_token.kind == .EOF {
return false;
}
tok := p.curr_token;
if allow_token(p, .Ident) || allow_token(p, .String) {
expect_token(p, .Assign);
name, ok := unquote_string(p, tok);
if !ok { error(p, tok.pos, "Unable to unquote string"); }
expr, _ := parse_expr(p);
d := top_dict(p);
if _, ok2 := d[name]; ok2 {
error(p, tok.pos, "Previous declaration of %s", name);
} else {
d[name] = expr;
}
expect_semicolon(p);
return true;
}
error(p, tok.pos, "Expected an assignment, got %s", kind_to_string[tok.kind]);
fix_advance(p);
return false;
}

View File

@@ -1,51 +0,0 @@
/*
package cel
sample := `
x = 123;
y = 321.456;
z = x * (y - 1) / 2;
w = "foo" + "bar";
# This is a comment
asd = "Semicolons are optional"
a = {id = {b = 123}} # Dict
b = a.id.b
f = [1, 4, 9] # Array
g = f[2]
h = x < y and w == "foobar"
i = h ? 123 : "google"
j = nil
"127.0.0.1" = "value" # Keys can be strings
"foo" = {
"bar" = {
"baz" = 123, # optional commas if newline is present
"zab" = 456,
"abz" = 789,
},
};
bar = @"foo"["bar"].baz
`;
main :: proc() {
p, ok := create_from_string(sample);
if !ok {
return;
}
defer destroy(p);
if p.error_count == 0 {
print(p);
}
}
*/
package cel

View File

@@ -1,523 +0,0 @@
package cel
import "core:fmt"
import "core:unicode/utf8"
Kind :: enum {
Illegal,
EOF,
Comment,
_literal_start,
Ident,
Integer,
Float,
Char,
String,
_literal_end,
_keyword_start,
True, // true
False, // false
Nil, // nil
_keyword_end,
_operator_start,
Question, // ?
And, // and
Or, // or
Add, // +
Sub, // -
Mul, // *
Quo, // /
Rem, // %
Not, // !
Eq, // ==
NotEq, // !=
Lt, // <
Gt, // >
LtEq, // <=
GtEq, // >=
At, // @
_operator_end,
_punc_start,
Assign, // =
Open_Paren, // (
Close_Paren, // )
Open_Bracket, // [
Close_Bracket, // ]
Open_Brace, // {
Close_Brace, // }
Colon, // :
Semicolon, // ;
Comma, // ,
Period, // .
_punc_end,
}
Pos :: struct {
file: string,
line: int,
column: int,
}
Token :: struct {
kind: Kind,
using pos: Pos,
lit: string,
}
Tokenizer :: struct {
src: []byte,
file: string, // May not be used
curr_rune: rune,
offset: int,
read_offset: int,
line_offset: int,
line_count: int,
insert_semi: bool,
error_count: int,
}
keywords := map[string]Kind{
"true" = .True,
"false" = .False,
"nil" = .Nil,
"and" = .And,
"or" = .Or,
};
kind_to_string := [len(Kind)]string{
"illegal",
"EOF",
"comment",
"",
"identifier",
"integer",
"float",
"character",
"string",
"",
"",
"true", "false", "nil",
"",
"",
"?", "and", "or",
"+", "-", "*", "/", "%",
"!",
"==", "!=", "<", ">", "<=", ">=",
"@",
"",
"",
"=",
"(", ")",
"[", "]",
"{", "}",
":", ";", ",", ".",
"",
};
precedence :: proc(op: Kind) -> int {
#partial switch op {
case .Question:
return 1;
case .Or:
return 2;
case .And:
return 3;
case .Eq, .NotEq, .Lt, .Gt, .LtEq, .GtEq:
return 4;
case .Add, .Sub:
return 5;
case .Mul, .Quo, .Rem:
return 6;
}
return 0;
}
token_lookup :: proc(ident: string) -> Kind {
if tok, is_keyword := keywords[ident]; is_keyword {
return tok;
}
return .Ident;
}
is_literal :: proc(tok: Kind) -> bool { return ._literal_start < tok && tok < ._literal_end; }
is_operator :: proc(tok: Kind) -> bool { return ._operator_start < tok && tok < ._operator_end; }
is_keyword :: proc(tok: Kind) -> bool { return ._keyword_start < tok && tok < ._keyword_end; }
tokenizer_init :: proc(t: ^Tokenizer, src: []byte, file := "") {
t.src = src;
t.file = file;
t.curr_rune = ' ';
t.offset = 0;
t.read_offset = 0;
t.line_offset = 0;
t.line_count = 1;
advance_to_next_rune(t);
if t.curr_rune == utf8.RUNE_BOM {
advance_to_next_rune(t);
}
}
token_error :: proc(t: ^Tokenizer, msg: string, args: ..any) {
fmt.eprintf("%s(%d:%d) Error: ", t.file, t.line_count, t.read_offset-t.line_offset+1);
fmt.eprintf(msg, ..args);
fmt.eprintln();
t.error_count += 1;
}
advance_to_next_rune :: proc(t: ^Tokenizer) {
if t.read_offset < len(t.src) {
t.offset = t.read_offset;
if t.curr_rune == '\n' {
t.line_offset = t.offset;
t.line_count += 1;
}
r, w := rune(t.src[t.read_offset]), 1;
switch {
case r == 0:
token_error(t, "Illegal character NUL");
case r >= utf8.RUNE_SELF:
r, w = utf8.decode_rune(t.src[t.read_offset:]);
if r == utf8.RUNE_ERROR && w == 1 {
token_error(t, "Illegal utf-8 encoding");
} else if r == utf8.RUNE_BOM && t.offset > 0 {
token_error(t, "Illegal byte order mark");
}
}
t.read_offset += w;
t.curr_rune = r;
} else {
t.offset = len(t.src);
if t.curr_rune == '\n' {
t.line_offset = t.offset;
t.line_count += 1;
}
t.curr_rune = utf8.RUNE_EOF;
}
}
get_pos :: proc(t: ^Tokenizer) -> Pos {
return Pos {
file = t.file,
line = t.line_count,
column = t.offset - t.line_offset + 1,
};
}
is_letter :: proc(r: rune) -> bool {
switch r {
case 'a'..='z', 'A'..='Z', '_':
return true;
}
return false;
}
is_digit :: proc(r: rune) -> bool {
switch r {
case '0'..='9':
return true;
}
return false;
}
skip_whitespace :: proc(t: ^Tokenizer) {
loop: for {
switch t.curr_rune {
case '\n':
if t.insert_semi {
break loop;
}
fallthrough;
case ' ', '\t', '\r', '\v', '\f':
advance_to_next_rune(t);
case:
break loop;
}
}
}
scan_identifier :: proc(t: ^Tokenizer) -> string {
offset := t.offset;
for is_letter(t.curr_rune) || is_digit(t.curr_rune) {
advance_to_next_rune(t);
}
return string(t.src[offset : t.offset]);
}
digit_value :: proc(r: rune) -> int {
switch r {
case '0'..='9': return int(r - '0');
case 'a'..='f': return int(r - 'a' + 10);
case 'A'..='F': return int(r - 'A' + 10);
}
return 16;
}
scan_number :: proc(t: ^Tokenizer, seen_decimal_point: bool) -> (Kind, string) {
scan_mantissa :: proc(t: ^Tokenizer, base: int) {
for digit_value(t.curr_rune) < base || t.curr_rune == '_' {
advance_to_next_rune(t);
}
}
scan_exponent :: proc(t: ^Tokenizer, tok: Kind, offset: int) -> (kind: Kind, text: string) {
kind = tok;
if t.curr_rune == 'e' || t.curr_rune == 'E' {
kind = .Float;
advance_to_next_rune(t);
if t.curr_rune == '-' || t.curr_rune == '+' {
advance_to_next_rune(t);
}
if digit_value(t.curr_rune) < 10 {
scan_mantissa(t, 10);
} else {
token_error(t, "Illegal floating point exponent");
}
}
text = string(t.src[offset : t.offset]);
return;
}
scan_fraction :: proc(t: ^Tokenizer, tok: Kind, offset: int) -> (kind: Kind, text: string) {
kind = tok;
if t.curr_rune == '.' {
kind = .Float;
advance_to_next_rune(t);
scan_mantissa(t, 10);
}
return scan_exponent(t, kind, offset);
}
offset := t.offset;
tok := Kind.Integer;
if seen_decimal_point {
offset -= 1;
tok = .Float;
scan_mantissa(t, 10);
return scan_exponent(t, tok, offset);
}
if t.curr_rune == '0' {
offset = t.offset;
advance_to_next_rune(t);
switch t.curr_rune {
case 'b', 'B':
advance_to_next_rune(t);
scan_mantissa(t, 2);
if t.offset - offset <= 2 {
token_error(t, "Illegal binary number");
}
case 'o', 'O':
advance_to_next_rune(t);
scan_mantissa(t, 8);
if t.offset - offset <= 2 {
token_error(t, "Illegal octal number");
}
case 'x', 'X':
advance_to_next_rune(t);
scan_mantissa(t, 16);
if t.offset - offset <= 2 {
token_error(t, "Illegal hexadecimal number");
}
case:
scan_mantissa(t, 10);
switch t.curr_rune {
case '.', 'e', 'E':
return scan_fraction(t, tok, offset);
}
}
return tok, string(t.src[offset:t.offset]);
}
scan_mantissa(t, 10);
return scan_fraction(t, tok, offset);
}
scan :: proc(t: ^Tokenizer) -> Token {
skip_whitespace(t);
offset := t.offset;
tok: Kind;
pos := get_pos(t);
lit: string;
insert_semi := false;
switch r := t.curr_rune; {
case is_letter(r):
insert_semi = true;
lit = scan_identifier(t);
tok = .Ident;
if len(lit) > 1 {
tok = token_lookup(lit);
}
case '0' <= r && r <= '9':
insert_semi = true;
tok, lit = scan_number(t, false);
case:
advance_to_next_rune(t);
switch r {
case -1:
if t.insert_semi {
t.insert_semi = false;
return Token{.Semicolon, pos, "\n"};
}
return Token{.EOF, pos, "\n"};
case '\n':
t.insert_semi = false;
return Token{.Semicolon, pos, "\n"};
case '"':
insert_semi = true;
quote := r;
tok = .String;
for {
this_r := t.curr_rune;
if this_r == '\n' || r < 0 {
token_error(t, "String literal not terminated");
break;
}
advance_to_next_rune(t);
if this_r == quote {
break;
}
// TODO(bill); Handle properly
if this_r == '\\' && t.curr_rune == quote {
advance_to_next_rune(t);
}
}
lit = string(t.src[offset+1:t.offset-1]);
case '#':
for t.curr_rune != '\n' && t.curr_rune >= 0 {
advance_to_next_rune(t);
}
if t.insert_semi {
t.insert_semi = false;
return Token{.Semicolon, pos, "\n"};
}
// Recursive!
return scan(t);
case '?': tok = .Question;
case ':': tok = .Colon;
case '@': tok = .At;
case ';':
tok = .Semicolon;
lit = ";";
case ',': tok = .Comma;
case '(':
tok = .Open_Paren;
case ')':
insert_semi = true;
tok = .Close_Paren;
case '[':
tok = .Open_Bracket;
case ']':
insert_semi = true;
tok = .Close_Bracket;
case '{':
tok = .Open_Brace;
case '}':
insert_semi = true;
tok = .Close_Brace;
case '+': tok = .Add;
case '-': tok = .Sub;
case '*': tok = .Mul;
case '/': tok = .Quo;
case '%': tok = .Rem;
case '!':
tok = .Not;
if t.curr_rune == '=' {
advance_to_next_rune(t);
tok = .NotEq;
}
case '=':
tok = .Assign;
if t.curr_rune == '=' {
advance_to_next_rune(t);
tok = .Eq;
}
case '<':
tok = .Lt;
if t.curr_rune == '=' {
advance_to_next_rune(t);
tok = .LtEq;
}
case '>':
tok = .Gt;
if t.curr_rune == '=' {
advance_to_next_rune(t);
tok = .GtEq;
}
case '.':
if '0' <= t.curr_rune && t.curr_rune <= '9' {
insert_semi = true;
tok, lit = scan_number(t, true);
} else {
tok = .Period;
}
case:
if r != utf8.RUNE_BOM {
token_error(t, "Illegal character '%r'", r);
}
insert_semi = t.insert_semi;
tok = .Illegal;
}
}
t.insert_semi = insert_semi;
if lit == "" {
lit = string(t.src[offset:t.offset]);
}
return Token{tok, pos, lit};
}