mirror of
https://github.com/odin-lang/Odin.git
synced 2025-12-29 09:24:33 +00:00
524 lines
9.0 KiB
Odin
524 lines
9.0 KiB
Odin
package cel
|
|
|
|
import "core:fmt"
|
|
import "core:unicode/utf8"
|
|
|
|
using Kind :: enum {
|
|
Illegal,
|
|
EOF,
|
|
Comment,
|
|
|
|
_literal_start,
|
|
Ident,
|
|
Integer,
|
|
Float,
|
|
Char,
|
|
String,
|
|
_literal_end,
|
|
|
|
_keyword_start,
|
|
True, // true
|
|
False, // false
|
|
Nil, // nil
|
|
_keyword_end,
|
|
|
|
|
|
_operator_start,
|
|
Question, // ?
|
|
|
|
And, // and
|
|
Or, // or
|
|
|
|
Add, // +
|
|
Sub, // -
|
|
Mul, // *
|
|
Quo, // /
|
|
Rem, // %
|
|
|
|
Not, // !
|
|
|
|
Eq, // ==
|
|
NotEq, // !=
|
|
Lt, // <
|
|
Gt, // >
|
|
LtEq, // <=
|
|
GtEq, // >=
|
|
|
|
At, // @
|
|
_operator_end,
|
|
|
|
_punc_start,
|
|
Assign, // =
|
|
|
|
Open_Paren, // (
|
|
Close_Paren, // )
|
|
Open_Bracket, // [
|
|
Close_Bracket, // ]
|
|
Open_Brace, // {
|
|
Close_Brace, // }
|
|
|
|
Colon, // :
|
|
Semicolon, // ;
|
|
Comma, // ,
|
|
Period, // .
|
|
_punc_end,
|
|
}
|
|
|
|
|
|
Pos :: struct {
|
|
file: string,
|
|
line: int,
|
|
column: int,
|
|
}
|
|
|
|
Token :: struct {
|
|
kind: Kind,
|
|
using pos: Pos,
|
|
lit: string,
|
|
}
|
|
|
|
Tokenizer :: struct {
|
|
src: []byte,
|
|
|
|
file: string, // May not be used
|
|
|
|
curr_rune: rune,
|
|
offset: int,
|
|
read_offset: int,
|
|
line_offset: int,
|
|
line_count: int,
|
|
|
|
insert_semi: bool,
|
|
|
|
error_count: int,
|
|
}
|
|
|
|
|
|
keywords := map[string]Kind{
|
|
"true" = True,
|
|
"false" = False,
|
|
"nil" = Nil,
|
|
"and" = And,
|
|
"or" = Or,
|
|
};
|
|
|
|
kind_to_string := [len(Kind)]string{
|
|
"illegal",
|
|
"EOF",
|
|
"comment",
|
|
|
|
"",
|
|
"identifier",
|
|
"integer",
|
|
"float",
|
|
"character",
|
|
"string",
|
|
"",
|
|
|
|
"",
|
|
"true", "false", "nil",
|
|
"",
|
|
|
|
"",
|
|
"?", "and", "or",
|
|
"+", "-", "*", "/", "%",
|
|
"!",
|
|
"==", "!=", "<", ">", "<=", ">=",
|
|
"@",
|
|
"",
|
|
|
|
"",
|
|
"=",
|
|
"(", ")",
|
|
"[", "]",
|
|
"{", "}",
|
|
":", ";", ",", ".",
|
|
"",
|
|
};
|
|
|
|
precedence :: proc(op: Kind) -> int {
|
|
#partial switch op {
|
|
case Question:
|
|
return 1;
|
|
case Or:
|
|
return 2;
|
|
case And:
|
|
return 3;
|
|
case Eq, NotEq, Lt, Gt, LtEq, GtEq:
|
|
return 4;
|
|
case Add, Sub:
|
|
return 5;
|
|
case Mul, Quo, Rem:
|
|
return 6;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
token_lookup :: proc(ident: string) -> Kind {
|
|
if tok, is_keyword := keywords[ident]; is_keyword {
|
|
return tok;
|
|
}
|
|
return Ident;
|
|
}
|
|
|
|
is_literal :: proc(tok: Kind) -> bool do return _literal_start < tok && tok < _literal_end;
|
|
is_operator :: proc(tok: Kind) -> bool do return _operator_start < tok && tok < _operator_end;
|
|
is_keyword :: proc(tok: Kind) -> bool do return _keyword_start < tok && tok < _keyword_end;
|
|
|
|
|
|
tokenizer_init :: proc(t: ^Tokenizer, src: []byte, file := "") {
|
|
t.src = src;
|
|
t.file = file;
|
|
t.curr_rune = ' ';
|
|
t.offset = 0;
|
|
t.read_offset = 0;
|
|
t.line_offset = 0;
|
|
t.line_count = 1;
|
|
|
|
advance_to_next_rune(t);
|
|
if t.curr_rune == utf8.RUNE_BOM {
|
|
advance_to_next_rune(t);
|
|
}
|
|
}
|
|
|
|
token_error :: proc(t: ^Tokenizer, msg: string, args: ..any) {
|
|
fmt.eprintf("%s(%d:%d) Error: ", t.file, t.line_count, t.read_offset-t.line_offset+1);
|
|
fmt.eprintf(msg, ..args);
|
|
fmt.eprintln();
|
|
t.error_count += 1;
|
|
}
|
|
|
|
advance_to_next_rune :: proc(t: ^Tokenizer) {
|
|
if t.read_offset < len(t.src) {
|
|
t.offset = t.read_offset;
|
|
if t.curr_rune == '\n' {
|
|
t.line_offset = t.offset;
|
|
t.line_count += 1;
|
|
}
|
|
r, w := rune(t.src[t.read_offset]), 1;
|
|
switch {
|
|
case r == 0:
|
|
token_error(t, "Illegal character NUL");
|
|
case r >= utf8.RUNE_SELF:
|
|
r, w = utf8.decode_rune(t.src[t.read_offset:]);
|
|
if r == utf8.RUNE_ERROR && w == 1 {
|
|
token_error(t, "Illegal utf-8 encoding");
|
|
} else if r == utf8.RUNE_BOM && t.offset > 0 {
|
|
token_error(t, "Illegal byte order mark");
|
|
}
|
|
}
|
|
|
|
t.read_offset += w;
|
|
t.curr_rune = r;
|
|
} else {
|
|
t.offset = len(t.src);
|
|
if t.curr_rune == '\n' {
|
|
t.line_offset = t.offset;
|
|
t.line_count += 1;
|
|
}
|
|
t.curr_rune = utf8.RUNE_EOF;
|
|
}
|
|
}
|
|
|
|
|
|
get_pos :: proc(t: ^Tokenizer) -> Pos {
|
|
return Pos {
|
|
file = t.file,
|
|
line = t.line_count,
|
|
column = t.offset - t.line_offset + 1,
|
|
};
|
|
}
|
|
|
|
is_letter :: proc(r: rune) -> bool {
|
|
switch r {
|
|
case 'a'..'z', 'A'..'Z', '_':
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
is_digit :: proc(r: rune) -> bool {
|
|
switch r {
|
|
case '0'..'9':
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
skip_whitespace :: proc(t: ^Tokenizer) {
|
|
loop: for {
|
|
switch t.curr_rune {
|
|
case '\n':
|
|
if t.insert_semi {
|
|
break loop;
|
|
}
|
|
fallthrough;
|
|
case ' ', '\t', '\r', '\v', '\f':
|
|
advance_to_next_rune(t);
|
|
|
|
case:
|
|
break loop;
|
|
}
|
|
}
|
|
}
|
|
|
|
scan_identifier :: proc(t: ^Tokenizer) -> string {
|
|
offset := t.offset;
|
|
for is_letter(t.curr_rune) || is_digit(t.curr_rune) {
|
|
advance_to_next_rune(t);
|
|
}
|
|
return string(t.src[offset : t.offset]);
|
|
}
|
|
|
|
digit_value :: proc(r: rune) -> int {
|
|
switch r {
|
|
case '0'..'9': return int(r - '0');
|
|
case 'a'..'f': return int(r - 'a' + 10);
|
|
case 'A'..'F': return int(r - 'A' + 10);
|
|
}
|
|
return 16;
|
|
}
|
|
|
|
scan_number :: proc(t: ^Tokenizer, seen_decimal_point: bool) -> (Kind, string) {
|
|
scan_mantissa :: proc(t: ^Tokenizer, base: int) {
|
|
for digit_value(t.curr_rune) < base || t.curr_rune == '_' {
|
|
advance_to_next_rune(t);
|
|
}
|
|
}
|
|
scan_exponent :: proc(t: ^Tokenizer, tok: Kind, offset: int) -> (kind: Kind, text: string) {
|
|
kind = tok;
|
|
if t.curr_rune == 'e' || t.curr_rune == 'E' {
|
|
kind = Float;
|
|
advance_to_next_rune(t);
|
|
if t.curr_rune == '-' || t.curr_rune == '+' {
|
|
advance_to_next_rune(t);
|
|
}
|
|
if digit_value(t.curr_rune) < 10 {
|
|
scan_mantissa(t, 10);
|
|
} else {
|
|
token_error(t, "Illegal floating point exponent");
|
|
}
|
|
}
|
|
text = string(t.src[offset : t.offset]);
|
|
return;
|
|
}
|
|
scan_fraction :: proc(t: ^Tokenizer, tok: Kind, offset: int) -> (kind: Kind, text: string) {
|
|
kind = tok;
|
|
if t.curr_rune == '.' {
|
|
kind = Float;
|
|
advance_to_next_rune(t);
|
|
scan_mantissa(t, 10);
|
|
}
|
|
|
|
return scan_exponent(t, kind, offset);
|
|
}
|
|
|
|
offset := t.offset;
|
|
tok := Integer;
|
|
|
|
if seen_decimal_point {
|
|
offset -= 1;
|
|
tok = Float;
|
|
scan_mantissa(t, 10);
|
|
return scan_exponent(t, tok, offset);
|
|
}
|
|
|
|
if t.curr_rune == '0' {
|
|
offset = t.offset;
|
|
advance_to_next_rune(t);
|
|
switch t.curr_rune {
|
|
case 'b', 'B':
|
|
advance_to_next_rune(t);
|
|
scan_mantissa(t, 2);
|
|
if t.offset - offset <= 2 {
|
|
token_error(t, "Illegal binary number");
|
|
}
|
|
case 'o', 'O':
|
|
advance_to_next_rune(t);
|
|
scan_mantissa(t, 8);
|
|
if t.offset - offset <= 2 {
|
|
token_error(t, "Illegal octal number");
|
|
}
|
|
case 'x', 'X':
|
|
advance_to_next_rune(t);
|
|
scan_mantissa(t, 16);
|
|
if t.offset - offset <= 2 {
|
|
token_error(t, "Illegal hexadecimal number");
|
|
}
|
|
case:
|
|
scan_mantissa(t, 10);
|
|
switch t.curr_rune {
|
|
case '.', 'e', 'E':
|
|
return scan_fraction(t, tok, offset);
|
|
}
|
|
}
|
|
|
|
return tok, string(t.src[offset:t.offset]);
|
|
}
|
|
|
|
scan_mantissa(t, 10);
|
|
|
|
return scan_fraction(t, tok, offset);
|
|
}
|
|
|
|
scan :: proc(t: ^Tokenizer) -> Token {
|
|
skip_whitespace(t);
|
|
|
|
offset := t.offset;
|
|
|
|
tok: Kind;
|
|
pos := get_pos(t);
|
|
lit: string;
|
|
|
|
insert_semi := false;
|
|
|
|
|
|
switch r := t.curr_rune; {
|
|
case is_letter(r):
|
|
insert_semi = true;
|
|
lit = scan_identifier(t);
|
|
tok = Ident;
|
|
if len(lit) > 1 {
|
|
tok = token_lookup(lit);
|
|
}
|
|
|
|
case '0' <= r && r <= '9':
|
|
insert_semi = true;
|
|
tok, lit = scan_number(t, false);
|
|
|
|
case:
|
|
advance_to_next_rune(t);
|
|
switch r {
|
|
case -1:
|
|
if t.insert_semi {
|
|
t.insert_semi = false;
|
|
return Token{Semicolon, pos, "\n"};
|
|
}
|
|
return Token{EOF, pos, "\n"};
|
|
|
|
case '\n':
|
|
t.insert_semi = false;
|
|
return Token{Semicolon, pos, "\n"};
|
|
|
|
case '"':
|
|
insert_semi = true;
|
|
quote := r;
|
|
tok = String;
|
|
for {
|
|
this_r := t.curr_rune;
|
|
if this_r == '\n' || r < 0 {
|
|
token_error(t, "String literal not terminated");
|
|
break;
|
|
}
|
|
advance_to_next_rune(t);
|
|
if this_r == quote {
|
|
break;
|
|
}
|
|
// TODO(bill); Handle properly
|
|
if this_r == '\\' && t.curr_rune == quote {
|
|
advance_to_next_rune(t);
|
|
}
|
|
}
|
|
|
|
lit = string(t.src[offset+1:t.offset-1]);
|
|
|
|
|
|
case '#':
|
|
for t.curr_rune != '\n' && t.curr_rune >= 0 {
|
|
advance_to_next_rune(t);
|
|
}
|
|
if t.insert_semi {
|
|
t.insert_semi = false;
|
|
return Token{Semicolon, pos, "\n"};
|
|
}
|
|
// Recursive!
|
|
return scan(t);
|
|
|
|
case '?': tok = Question;
|
|
case ':': tok = Colon;
|
|
case '@': tok = At;
|
|
|
|
case ';':
|
|
tok = Semicolon;
|
|
lit = ";";
|
|
case ',': tok = Comma;
|
|
|
|
case '(':
|
|
tok = Open_Paren;
|
|
case ')':
|
|
insert_semi = true;
|
|
tok = Close_Paren;
|
|
|
|
case '[':
|
|
tok = Open_Bracket;
|
|
case ']':
|
|
insert_semi = true;
|
|
tok = Close_Bracket;
|
|
|
|
case '{':
|
|
tok = Open_Brace;
|
|
case '}':
|
|
insert_semi = true;
|
|
tok = Close_Brace;
|
|
|
|
case '+': tok = Add;
|
|
case '-': tok = Sub;
|
|
case '*': tok = Mul;
|
|
case '/': tok = Quo;
|
|
case '%': tok = Rem;
|
|
|
|
case '!':
|
|
tok = Not;
|
|
if t.curr_rune == '=' {
|
|
advance_to_next_rune(t);
|
|
tok = NotEq;
|
|
}
|
|
|
|
case '=':
|
|
tok = Assign;
|
|
if t.curr_rune == '=' {
|
|
advance_to_next_rune(t);
|
|
tok = Eq;
|
|
}
|
|
|
|
case '<':
|
|
tok = Lt;
|
|
if t.curr_rune == '=' {
|
|
advance_to_next_rune(t);
|
|
tok = LtEq;
|
|
}
|
|
|
|
case '>':
|
|
tok = Gt;
|
|
if t.curr_rune == '=' {
|
|
advance_to_next_rune(t);
|
|
tok = GtEq;
|
|
}
|
|
|
|
case '.':
|
|
if '0' <= t.curr_rune && t.curr_rune <= '9' {
|
|
insert_semi = true;
|
|
tok, lit = scan_number(t, true);
|
|
} else {
|
|
tok = Period;
|
|
}
|
|
|
|
case:
|
|
if r != utf8.RUNE_BOM {
|
|
token_error(t, "Illegal character '%r'", r);
|
|
}
|
|
insert_semi = t.insert_semi;
|
|
tok = Illegal;
|
|
}
|
|
}
|
|
|
|
t.insert_semi = insert_semi;
|
|
|
|
if lit == "" {
|
|
lit = string(t.src[offset:t.offset]);
|
|
}
|
|
|
|
return Token{tok, pos, lit};
|
|
}
|