From 9cadd5846582dcaf0c9c5309eaa35cf2e08ec94f Mon Sep 17 00:00:00 2001 From: gingerBill Date: Tue, 28 May 2019 14:44:32 +0100 Subject: [PATCH] Improve tokenizer slightly --- core/odin/tokenizer/tokenizer.odin | 15 ++++++++++++--- src/tokenizer.cpp | 21 +++++++++++++-------- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/core/odin/tokenizer/tokenizer.odin b/core/odin/tokenizer/tokenizer.odin index fca475839..29ec9d0a4 100644 --- a/core/odin/tokenizer/tokenizer.odin +++ b/core/odin/tokenizer/tokenizer.odin @@ -98,9 +98,9 @@ advance_rune :: proc(using t: ^Tokenizer) { } } -peek_byte :: proc(using t: ^Tokenizer) -> byte { - if read_offset < len(src) { - return src[read_offset]; +peek_byte :: proc(using t: ^Tokenizer, offset := 0) -> byte { + if read_offset+offset < len(src) { + return src[read_offset+offset]; } return 0; } @@ -577,6 +577,8 @@ scan :: proc(t: ^Tokenizer) -> token.Token { case '≠': kind = token.Not_Eq; case '≤': kind = token.Lt_Eq; case '≥': kind = token.Gt_Eq; + case '∈': kind = token.In; + case '∉': kind = token.Notin; case '.': if '0' <= t.ch && t.ch <= '9' { @@ -586,6 +588,10 @@ scan :: proc(t: ^Tokenizer) -> token.Token { if t.ch == '.' { advance_rune(t); kind = token.Ellipsis; + if t.ch == '<' { + advance_rune(t); + kind = token.Range_Half; + } } } case ':': kind = token.Colon; @@ -597,6 +603,9 @@ scan :: proc(t: ^Tokenizer) -> token.Token { case ']': kind = token.Close_Bracket; case '{': kind = token.Open_Brace; case '}': kind = token.Close_Brace; + + case '\\': kind = token.Back_Slash; + case: if ch != utf8.RUNE_BOM { error(t, t.offset, "illegal character '%r': %d", ch, ch); diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index dd9aa109c..a551f0545 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -625,6 +625,13 @@ gb_inline void scan_mantissa(Tokenizer *t, i32 base) { } } +u8 peek_byte(Tokenizer *t, isize offset=0) { + if (t->read_curr+offset < t->end) { + return t->read_curr[offset]; + } + return 0; +} + Token scan_number_to_token(Tokenizer *t, bool seen_decimal_point) { Token token = {}; token.kind = Token_Integer; @@ -718,14 +725,12 @@ Token scan_number_to_token(Tokenizer *t, bool seen_decimal_point) { fraction: if (t->curr_rune == '.') { - // HACK(bill): This may be inefficient - TokenizerState state = save_tokenizer_state(t); - advance_to_next_rune(t); - if (t->curr_rune == '.') { - // TODO(bill): Clean up this shit - restore_tokenizer_state(t, &state); + if (peek_byte(t) == '.') { + // NOTE(bill): this is kind of ellipsis goto end; } + advance_to_next_rune(t); + token.kind = Token_Float; scan_mantissa(t, 10); } @@ -983,7 +988,7 @@ Token tokenizer_get_token(Tokenizer *t) { } break; case '.': - if (t->curr_rune == '.') { // Could be an ellipsis + if (t->curr_rune == '.') { advance_to_next_rune(t); token.kind = Token_Ellipsis; if (t->curr_rune == '<') { @@ -1039,7 +1044,7 @@ Token tokenizer_get_token(Tokenizer *t) { if (t->curr_rune == '=') { advance_to_next_rune(t); token.kind = Token_SubEq; - } else if (t->curr_rune == '-' && t->read_curr[0] == '-') { + } else if (t->curr_rune == '-' && peek_byte(t) == '-') { advance_to_next_rune(t); advance_to_next_rune(t); token.kind = Token_Undef;