Improve tokenizer slightly

This commit is contained in:
gingerBill
2019-05-28 14:44:32 +01:00
parent 65e9b4d5f0
commit 9cadd58465
2 changed files with 25 additions and 11 deletions

View File

@@ -98,9 +98,9 @@ advance_rune :: proc(using t: ^Tokenizer) {
}
}
peek_byte :: proc(using t: ^Tokenizer) -> byte {
if read_offset < len(src) {
return src[read_offset];
peek_byte :: proc(using t: ^Tokenizer, offset := 0) -> byte {
if read_offset+offset < len(src) {
return src[read_offset+offset];
}
return 0;
}
@@ -577,6 +577,8 @@ scan :: proc(t: ^Tokenizer) -> token.Token {
case '≠': kind = token.Not_Eq;
case '≤': kind = token.Lt_Eq;
case '≥': kind = token.Gt_Eq;
case '∈': kind = token.In;
case '∉': kind = token.Notin;
case '.':
if '0' <= t.ch && t.ch <= '9' {
@@ -586,6 +588,10 @@ scan :: proc(t: ^Tokenizer) -> token.Token {
if t.ch == '.' {
advance_rune(t);
kind = token.Ellipsis;
if t.ch == '<' {
advance_rune(t);
kind = token.Range_Half;
}
}
}
case ':': kind = token.Colon;
@@ -597,6 +603,9 @@ scan :: proc(t: ^Tokenizer) -> token.Token {
case ']': kind = token.Close_Bracket;
case '{': kind = token.Open_Brace;
case '}': kind = token.Close_Brace;
case '\\': kind = token.Back_Slash;
case:
if ch != utf8.RUNE_BOM {
error(t, t.offset, "illegal character '%r': %d", ch, ch);

View File

@@ -625,6 +625,13 @@ gb_inline void scan_mantissa(Tokenizer *t, i32 base) {
}
}
u8 peek_byte(Tokenizer *t, isize offset=0) {
if (t->read_curr+offset < t->end) {
return t->read_curr[offset];
}
return 0;
}
Token scan_number_to_token(Tokenizer *t, bool seen_decimal_point) {
Token token = {};
token.kind = Token_Integer;
@@ -718,14 +725,12 @@ Token scan_number_to_token(Tokenizer *t, bool seen_decimal_point) {
fraction:
if (t->curr_rune == '.') {
// HACK(bill): This may be inefficient
TokenizerState state = save_tokenizer_state(t);
advance_to_next_rune(t);
if (t->curr_rune == '.') {
// TODO(bill): Clean up this shit
restore_tokenizer_state(t, &state);
if (peek_byte(t) == '.') {
// NOTE(bill): this is kind of ellipsis
goto end;
}
advance_to_next_rune(t);
token.kind = Token_Float;
scan_mantissa(t, 10);
}
@@ -983,7 +988,7 @@ Token tokenizer_get_token(Tokenizer *t) {
} break;
case '.':
if (t->curr_rune == '.') { // Could be an ellipsis
if (t->curr_rune == '.') {
advance_to_next_rune(t);
token.kind = Token_Ellipsis;
if (t->curr_rune == '<') {
@@ -1039,7 +1044,7 @@ Token tokenizer_get_token(Tokenizer *t) {
if (t->curr_rune == '=') {
advance_to_next_rune(t);
token.kind = Token_SubEq;
} else if (t->curr_rune == '-' && t->read_curr[0] == '-') {
} else if (t->curr_rune == '-' && peek_byte(t) == '-') {
advance_to_next_rune(t);
advance_to_next_rune(t);
token.kind = Token_Undef;