Add experimental -insert-semicolon functionality to tokenizer and parser

This commit is contained in:
gingerBill
2020-11-01 15:10:06 +00:00
parent 75e8e5e06f
commit 54fbdabc38
11 changed files with 2253 additions and 42 deletions

View File

@@ -527,6 +527,12 @@ struct TokenizerState {
u8 * read_curr; // pos from start
u8 * line; // current line pos
isize line_count;
bool insert_semicolon;
};
enum TokenizerFlags {
TokenizerFlag_None = 0,
TokenizerFlag_InsertSemicolon = 1<<0,
};
struct Tokenizer {
@@ -542,6 +548,9 @@ struct Tokenizer {
isize error_count;
Array<String> allocated_strings;
TokenizerFlags flags;
bool insert_semicolon;
};
@@ -552,15 +561,17 @@ TokenizerState save_tokenizer_state(Tokenizer *t) {
state.read_curr = t->read_curr;
state.line = t->line;
state.line_count = t->line_count;
state.insert_semicolon = t->insert_semicolon;
return state;
}
void restore_tokenizer_state(Tokenizer *t, TokenizerState *state) {
t->curr_rune = state->curr_rune;
t->curr = state->curr;
t->read_curr = state->read_curr;
t->line = state->line;
t->line_count = state->line_count;
t->curr_rune = state->curr_rune;
t->curr = state->curr;
t->read_curr = state->read_curr;
t->line = state->line;
t->line_count = state->line_count;
t->insert_semicolon = state->insert_semicolon;
}
@@ -615,7 +626,7 @@ void advance_to_next_rune(Tokenizer *t) {
}
}
TokenizerInitError init_tokenizer(Tokenizer *t, String fullpath) {
TokenizerInitError init_tokenizer(Tokenizer *t, String fullpath, TokenizerFlags flags = TokenizerFlag_None) {
TokenizerInitError err = TokenizerInit_None;
char *c_str = alloc_cstring(heap_allocator(), fullpath);
@@ -625,6 +636,7 @@ TokenizerInitError init_tokenizer(Tokenizer *t, String fullpath) {
gbFileContents fc = gb_file_read_contents(heap_allocator(), true, c_str);
gb_zero_item(t);
t->flags = flags;
t->fullpath = fullpath;
t->line_count = 1;
@@ -888,9 +900,13 @@ void tokenizer_get_token(Tokenizer *t, Token *token) {
// Skip whitespace
for (;;) {
switch (t->curr_rune) {
case '\n':
if (t->insert_semicolon) {
break;
}
/*fallthrough*/
case ' ':
case '\t':
case '\n':
case '\r':
advance_to_next_rune(t);
continue;
@@ -907,6 +923,8 @@ void tokenizer_get_token(Tokenizer *t, Token *token) {
token->pos.offset = t->curr - t->start;
token->pos.column = t->curr - t->line + 1;
bool insert_semicolon = false;
Rune curr_rune = t->curr_rune;
if (rune_is_letter(curr_rune)) {
token->kind = Token_Ident;
@@ -930,19 +948,51 @@ void tokenizer_get_token(Tokenizer *t, Token *token) {
}
}
}
switch (token->kind) {
case Token_Ident:
case Token_context:
case Token_typeid: // Dunno?
case Token_break:
case Token_continue:
case Token_fallthrough:
case Token_return:
insert_semicolon = true;
break;
}
if (t->flags & TokenizerFlag_InsertSemicolon) {
t->insert_semicolon = insert_semicolon;
}
return;
} else if (gb_is_between(curr_rune, '0', '9')) {
insert_semicolon = true;
scan_number_to_token(t, token, false);
} else {
advance_to_next_rune(t);
switch (curr_rune) {
case GB_RUNE_EOF:
token->kind = Token_EOF;
if (t->insert_semicolon) {
t->insert_semicolon = false; // EOF consumed
token->string = str_lit("\n");
token->kind = Token_Semicolon;
return;
}
break;
case '\n':
t->insert_semicolon = false;
token->string = str_lit("\n");
token->kind = Token_Semicolon;
return;
case '\'': // Rune Literal
{
insert_semicolon = true;
token->kind = Token_Rune;
Rune quote = curr_rune;
bool valid = true;
@@ -978,12 +1028,19 @@ void tokenizer_get_token(Tokenizer *t, Token *token) {
} else {
tokenizer_err(t, "Invalid rune literal");
}
if (t->flags & TokenizerFlag_InsertSemicolon) {
t->insert_semicolon = insert_semicolon;
}
return;
} break;
case '`': // Raw String Literal
case '"': // String Literal
{
insert_semicolon = true;
bool has_carriage_return = false;
i32 success;
Rune quote = curr_rune;
@@ -1028,6 +1085,11 @@ void tokenizer_get_token(Tokenizer *t, Token *token) {
} else {
tokenizer_err(t, "Invalid string literal");
}
if (t->flags & TokenizerFlag_InsertSemicolon) {
t->insert_semicolon = insert_semicolon;
}
return;
} break;
@@ -1048,17 +1110,32 @@ void tokenizer_get_token(Tokenizer *t, Token *token) {
case '@': token->kind = Token_At; break;
case '$': token->kind = Token_Dollar; break;
case '?': token->kind = Token_Question; break;
case '^': token->kind = Token_Pointer; break;
case '?':
insert_semicolon = true;
token->kind = Token_Question;
break;
case '^':
insert_semicolon = true;
token->kind = Token_Pointer;
break;
case ';': token->kind = Token_Semicolon; break;
case ',': token->kind = Token_Comma; break;
case ':': token->kind = Token_Colon; break;
case '(': token->kind = Token_OpenParen; break;
case ')': token->kind = Token_CloseParen; break;
case '[': token->kind = Token_OpenBracket; break;
case ']': token->kind = Token_CloseBracket; break;
case ')':
insert_semicolon = true;
token->kind = Token_CloseParen;
break;
case '[': token->kind = Token_OpenBracket; break;
case ']':
insert_semicolon = true;
token->kind = Token_CloseBracket;
break;
case '{': token->kind = Token_OpenBrace; break;
case '}': token->kind = Token_CloseBrace; break;
case '}':
insert_semicolon = true;
token->kind = Token_CloseBrace;
break;
case '\\': token->kind = Token_BackSlash; break;
case '%':
@@ -1131,10 +1208,12 @@ void tokenizer_get_token(Tokenizer *t, Token *token) {
case '#':
if (t->curr_rune == '!') {
insert_semicolon = t->insert_semicolon;
token->kind = Token_Comment;
while (t->curr_rune != '\n' && t->curr_rune != GB_RUNE_EOF) {
advance_to_next_rune(t);
}
token->kind = Token_Comment;
} else {
token->kind = Token_Hash;
}
@@ -1144,6 +1223,7 @@ void tokenizer_get_token(Tokenizer *t, Token *token) {
case '/': {
token->kind = Token_Quo;
if (t->curr_rune == '/') {
insert_semicolon = t->insert_semicolon;
token->kind = Token_Comment;
while (t->curr_rune != '\n' && t->curr_rune != GB_RUNE_EOF) {
@@ -1255,11 +1335,18 @@ void tokenizer_get_token(Tokenizer *t, Token *token) {
int len = cast(int)gb_utf8_encode_rune(str, curr_rune);
tokenizer_err(t, "Illegal character: %.*s (%d) ", len, str, curr_rune);
}
insert_semicolon = t->insert_semicolon; // Preserve insert_semicolon info
token->kind = Token_Invalid;
break;
}
}
if (t->flags & TokenizerFlag_InsertSemicolon) {
t->insert_semicolon = insert_semicolon;
}
token->string.len = t->curr - token->string.text;
return;
}