Add experimental -insert-semicolon functionality to tokenizer and parser

This commit is contained in:
gingerBill
2020-11-01 15:10:06 +00:00
parent 75e8e5e06f
commit 54fbdabc38
11 changed files with 2253 additions and 42 deletions

View File

@@ -10,7 +10,7 @@ Float_Class :: enum {
Neg_Zero, // the negative zero
NaN, // Not-A-Number (NaN)
Inf, // positive infinity
Neg_Inf // negative infinity
Neg_Inf, // negative infinity
};
TAU :: 6.28318530717958647692528676655900576;

View File

@@ -935,7 +935,7 @@ Small_Allocator :: struct(BUFFER_SIZE: int)
start: uintptr,
curr: uintptr,
end: uintptr,
chunk_size: int
chunk_size: int,
}
small_allocator :: proc(s: ^$S/Small_Allocator, backing := context.allocator) -> (a: Allocator) {

View File

@@ -114,10 +114,10 @@ cleanpath_strip_prefix :: proc(buf: []u16) -> []u16 {
buf = buf[:N];
if len(buf) >= 4 {
if buf[0] == '\\'
&& buf[1] == '\\'
&& buf[2] == '?'
&& buf[3] == '\\' {
if buf[0] == '\\' &&
buf[1] == '\\' &&
buf[2] == '?' &&
buf[3] == '\\' {
buf = buf[4:];
}
}

View File

@@ -64,7 +64,7 @@ Type_Info_Boolean :: struct {};
Type_Info_Any :: struct {};
Type_Info_Type_Id :: struct {};
Type_Info_Pointer :: struct {
elem: ^Type_Info // nil -> rawptr
elem: ^Type_Info, // nil -> rawptr
};
Type_Info_Procedure :: struct {
params: ^Type_Info, // Type_Info_Tuple
@@ -296,7 +296,7 @@ Logger_Option :: enum {
Line,
Procedure,
Terminal_Color,
Thread_Id
Thread_Id,
}
Logger_Options :: bit_set[Logger_Option];

View File

@@ -183,8 +183,7 @@ foreign kernel32 {
lpOverlapped: LPOVERLAPPED,
) -> BOOL ---
CloseHandle :: proc(hObject: HANDLE) -> BOOL ---
MoveFileExW :: proc(lpExistingFileName: LPCWSTR, lpNewFileName: LPCWSTR, dwFlags: DWORD)
-> BOOL ---
MoveFileExW :: proc(lpExistingFileName: LPCWSTR, lpNewFileName: LPCWSTR, dwFlags: DWORD) -> BOOL ---
SetFilePointerEx :: proc(
hFile: HANDLE,
liDistanceToMove: LARGE_INTEGER,

View File

@@ -118,12 +118,12 @@ SYNCHRONIZE: DWORD : 0x00100000;
GENERIC_READ: DWORD : 0x80000000;
GENERIC_WRITE: DWORD : 0x40000000;
STANDARD_RIGHTS_WRITE: DWORD : READ_CONTROL;
FILE_GENERIC_WRITE: DWORD : STANDARD_RIGHTS_WRITE
| FILE_WRITE_DATA
| FILE_WRITE_ATTRIBUTES
| FILE_WRITE_EA
| FILE_APPEND_DATA
| SYNCHRONIZE;
FILE_GENERIC_WRITE: DWORD : STANDARD_RIGHTS_WRITE |
FILE_WRITE_DATA |
FILE_WRITE_ATTRIBUTES |
FILE_WRITE_EA |
FILE_APPEND_DATA |
SYNCHRONIZE;
FILE_FLAG_OPEN_REPARSE_POINT: DWORD : 0x00200000;
FILE_FLAG_BACKUP_SEMANTICS: DWORD : 0x02000000;
@@ -722,7 +722,7 @@ SYSTEM_INFO :: struct {
// https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/wdm/ns-wdm-_osversioninfoexw
OSVERSIONINFOEXW :: struct {
dwOSVersionInfoSize: ULONG,
dwOSVersionInfoSize: ULONG,
dwMajorVersion: ULONG,
dwMinorVersion: ULONG,
dwBuildNumber: ULONG,
@@ -733,4 +733,4 @@ OSVERSIONINFOEXW :: struct {
wSuiteMask: USHORT,
wProductType: UCHAR,
wReserved: UCHAR,
};
};

File diff suppressed because it is too large Load Diff

View File

@@ -157,6 +157,7 @@ struct BuildContext {
bool different_os;
bool keep_object_files;
bool disallow_do;
bool insert_semicolon;
bool use_llvm_api;

View File

@@ -587,6 +587,7 @@ enum BuildFlagKind {
BuildFlag_DisallowDo,
BuildFlag_DefaultToNilAllocator,
BuildFlag_InsertSemicolon,
BuildFlag_Compact,
BuildFlag_GlobalDefinitions,
@@ -687,6 +688,7 @@ bool parse_build_flags(Array<String> args) {
add_flag(&build_flags, BuildFlag_DisallowDo, str_lit("disallow-do"), BuildFlagParam_None);
add_flag(&build_flags, BuildFlag_DefaultToNilAllocator, str_lit("default-to-nil-allocator"), BuildFlagParam_None);
add_flag(&build_flags, BuildFlag_InsertSemicolon, str_lit("insert-semicolon"), BuildFlagParam_None);
add_flag(&build_flags, BuildFlag_Compact, str_lit("compact"), BuildFlagParam_None);
add_flag(&build_flags, BuildFlag_GlobalDefinitions, str_lit("global-definitions"), BuildFlagParam_None);
@@ -1127,6 +1129,10 @@ bool parse_build_flags(Array<String> args) {
build_context.ODIN_DEFAULT_TO_NIL_ALLOCATOR = true;
break;
case BuildFlag_InsertSemicolon:
build_context.insert_semicolon = true;
break;
case BuildFlag_Compact:
if (!build_context.query_data_set_settings.ok) {
gb_printf_err("Invalid use of -compact flag, only allowed with 'odin query'\n");

View File

@@ -1247,11 +1247,69 @@ bool peek_token_kind(AstFile *f, TokenKind kind) {
return false;
}
Token peek_token(AstFile *f) {
for (isize i = f->curr_token_index+1; i < f->tokens.count; i++) {
Token tok = f->tokens[i];
if (tok.kind == Token_Comment) {
continue;
}
return tok;
}
return {};
}
bool token_is_newline(Token const &tok) {
return tok.kind == Token_Semicolon && tok.string == "\n";
}
bool skip_possible_newline(AstFile *f) {
if ((f->tokenizer.flags & TokenizerFlag_InsertSemicolon) == 0) {
return false;
}
Token *prev = &f->curr_token;
if (prev->kind == Token_Semicolon && prev->string == "\n") {
advance_token(f);
return true;
}
return false;
}
bool skip_possible_newline_for_literal(AstFile *f) {
if ((f->tokenizer.flags & TokenizerFlag_InsertSemicolon) == 0) {
return false;
}
TokenPos curr_pos = f->curr_token.pos;
if (token_is_newline(f->curr_token)) {
Token next = peek_token(f);
if (curr_pos.line+1 >= next.pos.line) {
switch (next.kind) {
case Token_OpenBrace:
case Token_else:
case Token_where:
advance_token(f);
return true;
}
}
}
return false;
}
String token_to_string(Token const &tok) {
String p = token_strings[tok.kind];
if (token_is_newline(tok)) {
p = str_lit("newline");
}
return p;
}
Token expect_token(AstFile *f, TokenKind kind) {
Token prev = f->curr_token;
if (prev.kind != kind) {
String c = token_strings[kind];
String p = token_strings[prev.kind];
String p = token_to_string(prev);
syntax_error(f->curr_token, "Expected '%.*s', got '%.*s'", LIT(c), LIT(p));
if (prev.kind == Token_EOF) {
gb_exit(1);
@@ -1265,7 +1323,7 @@ Token expect_token(AstFile *f, TokenKind kind) {
Token expect_token_after(AstFile *f, TokenKind kind, char const *msg) {
Token prev = f->curr_token;
if (prev.kind != kind) {
String p = token_strings[prev.kind];
String p = token_to_string(prev);
syntax_error(f->curr_token, "Expected '%.*s' after %s, got '%.*s'",
LIT(token_strings[kind]),
msg,
@@ -1296,11 +1354,13 @@ Token expect_operator(AstFile *f) {
} else if (prev.kind == Token_if || prev.kind == Token_when) {
// okay
} else if (!gb_is_between(prev.kind, Token__OperatorBegin+1, Token__OperatorEnd-1)) {
String p = token_to_string(prev);
syntax_error(f->curr_token, "Expected an operator, got '%.*s'",
LIT(token_strings[prev.kind]));
LIT(p));
} else if (!f->allow_range && is_token_range(prev)) {
String p = token_to_string(prev);
syntax_error(f->curr_token, "Expected an non-range operator, got '%.*s'",
LIT(token_strings[prev.kind]));
LIT(p));
}
advance_token(f);
return prev;
@@ -1309,8 +1369,9 @@ Token expect_operator(AstFile *f) {
Token expect_keyword(AstFile *f) {
Token prev = f->curr_token;
if (!gb_is_between(prev.kind, Token__KeywordBegin+1, Token__KeywordEnd-1)) {
String p = token_to_string(prev);
syntax_error(f->curr_token, "Expected a keyword, got '%.*s'",
LIT(token_strings[prev.kind]));
LIT(p));
}
advance_token(f);
return prev;
@@ -1470,7 +1531,22 @@ void expect_semicolon(AstFile *f, Ast *s) {
if (s != nullptr) {
if (prev_token.pos.line != f->curr_token.pos.line) {
bool insert_semi = (f->tokenizer.flags & TokenizerFlag_InsertSemicolon) != 0;
if (insert_semi) {
switch (f->curr_token.kind) {
case Token_CloseBrace:
case Token_CloseParen:
case Token_else:
case Token_EOF:
return;
default:
if (is_semicolon_optional_for_node(f, s)) {
return;
}
break;
}
} else if (prev_token.pos.line != f->curr_token.pos.line) {
if (is_semicolon_optional_for_node(f, s)) {
return;
}
@@ -1488,14 +1564,16 @@ void expect_semicolon(AstFile *f, Ast *s) {
}
}
String node_string = ast_strings[s->kind];
String p = token_to_string(f->curr_token);
syntax_error(prev_token, "Expected ';' after %.*s, got %.*s",
LIT(node_string), LIT(token_strings[f->curr_token.kind]));
LIT(node_string), LIT(p));
} else {
switch (f->curr_token.kind) {
case Token_EOF:
return;
}
syntax_error(prev_token, "Expected ';'");
String p = token_to_string(f->curr_token);
syntax_error(prev_token, "Expected ';', got %.*s", LIT(p));
}
fix_advance_to_next_stmt(f);
}
@@ -1990,6 +2068,7 @@ Ast *parse_operand(AstFile *f, bool lhs) {
Token where_token = {};
Array<Ast *> where_clauses = {};
u64 tags = 0;
skip_possible_newline_for_literal(f);
if (f->curr_token.kind == Token_where) {
where_token = expect_token(f, Token_where);
@@ -2166,6 +2245,8 @@ Ast *parse_operand(AstFile *f, bool lhs) {
Token where_token = {};
Array<Ast *> where_clauses = {};
skip_possible_newline_for_literal(f);
if (f->curr_token.kind == Token_where) {
where_token = expect_token(f, Token_where);
isize prev_level = f->expr_level;
@@ -2237,6 +2318,7 @@ Ast *parse_operand(AstFile *f, bool lhs) {
syntax_error(f->curr_token, "#maybe and #no_nil cannot be applied together");
}
skip_possible_newline_for_literal(f);
Token where_token = {};
Array<Ast *> where_clauses = {};
@@ -3361,7 +3443,8 @@ bool parse_expect_field_separator(AstFile *f, Ast *param) {
return true;
}
if (token.kind == Token_Semicolon) {
syntax_error(f->curr_token, "Expected a comma, got a semicolon");
String p = token_to_string(token);
syntax_error(f->curr_token, "Expected a comma, got a %.*s", LIT(p));
advance_token(f);
return true;
}
@@ -3684,6 +3767,7 @@ Ast *parse_if_stmt(AstFile *f) {
}
} else {
body = parse_block_stmt(f, false);
skip_possible_newline_for_literal(f);
}
if (allow_token(f, Token_else)) {
@@ -3739,6 +3823,7 @@ Ast *parse_when_stmt(AstFile *f) {
}
} else {
body = parse_block_stmt(f, true);
skip_possible_newline_for_literal(f);
}
if (allow_token(f, Token_else)) {
@@ -3844,6 +3929,7 @@ Ast *parse_for_stmt(AstFile *f) {
}
} else {
body = parse_block_stmt(f, false);
skip_possible_newline_for_literal(f);
}
return ast_range_stmt(f, token, nullptr, nullptr, in_token, rhs, body);
}
@@ -3879,6 +3965,7 @@ Ast *parse_for_stmt(AstFile *f) {
}
} else {
body = parse_block_stmt(f, false);
skip_possible_newline_for_literal(f);
}
if (is_range) {
@@ -4160,6 +4247,8 @@ Ast *parse_attribute(AstFile *f, Token token, TokenKind open_kind, TokenKind clo
}
Ast *attribute = ast_attribute(f, token, open, close, elems);
skip_possible_newline(f);
Ast *decl = parse_stmt(f);
if (decl->kind == Ast_ValueDecl) {
array_add(&decl->ValueDecl.attributes, attribute);
@@ -4228,6 +4317,7 @@ Ast *parse_stmt(AstFile *f) {
}
} else {
body = parse_block_stmt(f, false);
skip_possible_newline_for_literal(f);
}
if (bad_stmt) {
return ast_bad_stmt(f, inline_token, f->curr_token);
@@ -4432,7 +4522,11 @@ ParseFileError init_ast_file(AstFile *f, String fullpath, TokenPos *err_pos) {
if (!string_ends_with(f->fullpath, str_lit(".odin"))) {
return ParseFile_WrongExtension;
}
TokenizerInitError err = init_tokenizer(&f->tokenizer, f->fullpath);
TokenizerFlags tokenizer_flags = TokenizerFlag_None;
if (build_context.insert_semicolon) {
tokenizer_flags = TokenizerFlag_InsertSemicolon;
}
TokenizerInitError err = init_tokenizer(&f->tokenizer, f->fullpath, tokenizer_flags);
if (err != TokenizerInit_None) {
switch (err) {
case TokenizerInit_Empty:

View File

@@ -527,6 +527,12 @@ struct TokenizerState {
u8 * read_curr; // pos from start
u8 * line; // current line pos
isize line_count;
bool insert_semicolon;
};
enum TokenizerFlags {
TokenizerFlag_None = 0,
TokenizerFlag_InsertSemicolon = 1<<0,
};
struct Tokenizer {
@@ -542,6 +548,9 @@ struct Tokenizer {
isize error_count;
Array<String> allocated_strings;
TokenizerFlags flags;
bool insert_semicolon;
};
@@ -552,15 +561,17 @@ TokenizerState save_tokenizer_state(Tokenizer *t) {
state.read_curr = t->read_curr;
state.line = t->line;
state.line_count = t->line_count;
state.insert_semicolon = t->insert_semicolon;
return state;
}
void restore_tokenizer_state(Tokenizer *t, TokenizerState *state) {
t->curr_rune = state->curr_rune;
t->curr = state->curr;
t->read_curr = state->read_curr;
t->line = state->line;
t->line_count = state->line_count;
t->curr_rune = state->curr_rune;
t->curr = state->curr;
t->read_curr = state->read_curr;
t->line = state->line;
t->line_count = state->line_count;
t->insert_semicolon = state->insert_semicolon;
}
@@ -615,7 +626,7 @@ void advance_to_next_rune(Tokenizer *t) {
}
}
TokenizerInitError init_tokenizer(Tokenizer *t, String fullpath) {
TokenizerInitError init_tokenizer(Tokenizer *t, String fullpath, TokenizerFlags flags = TokenizerFlag_None) {
TokenizerInitError err = TokenizerInit_None;
char *c_str = alloc_cstring(heap_allocator(), fullpath);
@@ -625,6 +636,7 @@ TokenizerInitError init_tokenizer(Tokenizer *t, String fullpath) {
gbFileContents fc = gb_file_read_contents(heap_allocator(), true, c_str);
gb_zero_item(t);
t->flags = flags;
t->fullpath = fullpath;
t->line_count = 1;
@@ -888,9 +900,13 @@ void tokenizer_get_token(Tokenizer *t, Token *token) {
// Skip whitespace
for (;;) {
switch (t->curr_rune) {
case '\n':
if (t->insert_semicolon) {
break;
}
/*fallthrough*/
case ' ':
case '\t':
case '\n':
case '\r':
advance_to_next_rune(t);
continue;
@@ -907,6 +923,8 @@ void tokenizer_get_token(Tokenizer *t, Token *token) {
token->pos.offset = t->curr - t->start;
token->pos.column = t->curr - t->line + 1;
bool insert_semicolon = false;
Rune curr_rune = t->curr_rune;
if (rune_is_letter(curr_rune)) {
token->kind = Token_Ident;
@@ -930,19 +948,51 @@ void tokenizer_get_token(Tokenizer *t, Token *token) {
}
}
}
switch (token->kind) {
case Token_Ident:
case Token_context:
case Token_typeid: // Dunno?
case Token_break:
case Token_continue:
case Token_fallthrough:
case Token_return:
insert_semicolon = true;
break;
}
if (t->flags & TokenizerFlag_InsertSemicolon) {
t->insert_semicolon = insert_semicolon;
}
return;
} else if (gb_is_between(curr_rune, '0', '9')) {
insert_semicolon = true;
scan_number_to_token(t, token, false);
} else {
advance_to_next_rune(t);
switch (curr_rune) {
case GB_RUNE_EOF:
token->kind = Token_EOF;
if (t->insert_semicolon) {
t->insert_semicolon = false; // EOF consumed
token->string = str_lit("\n");
token->kind = Token_Semicolon;
return;
}
break;
case '\n':
t->insert_semicolon = false;
token->string = str_lit("\n");
token->kind = Token_Semicolon;
return;
case '\'': // Rune Literal
{
insert_semicolon = true;
token->kind = Token_Rune;
Rune quote = curr_rune;
bool valid = true;
@@ -978,12 +1028,19 @@ void tokenizer_get_token(Tokenizer *t, Token *token) {
} else {
tokenizer_err(t, "Invalid rune literal");
}
if (t->flags & TokenizerFlag_InsertSemicolon) {
t->insert_semicolon = insert_semicolon;
}
return;
} break;
case '`': // Raw String Literal
case '"': // String Literal
{
insert_semicolon = true;
bool has_carriage_return = false;
i32 success;
Rune quote = curr_rune;
@@ -1028,6 +1085,11 @@ void tokenizer_get_token(Tokenizer *t, Token *token) {
} else {
tokenizer_err(t, "Invalid string literal");
}
if (t->flags & TokenizerFlag_InsertSemicolon) {
t->insert_semicolon = insert_semicolon;
}
return;
} break;
@@ -1048,17 +1110,32 @@ void tokenizer_get_token(Tokenizer *t, Token *token) {
case '@': token->kind = Token_At; break;
case '$': token->kind = Token_Dollar; break;
case '?': token->kind = Token_Question; break;
case '^': token->kind = Token_Pointer; break;
case '?':
insert_semicolon = true;
token->kind = Token_Question;
break;
case '^':
insert_semicolon = true;
token->kind = Token_Pointer;
break;
case ';': token->kind = Token_Semicolon; break;
case ',': token->kind = Token_Comma; break;
case ':': token->kind = Token_Colon; break;
case '(': token->kind = Token_OpenParen; break;
case ')': token->kind = Token_CloseParen; break;
case '[': token->kind = Token_OpenBracket; break;
case ']': token->kind = Token_CloseBracket; break;
case ')':
insert_semicolon = true;
token->kind = Token_CloseParen;
break;
case '[': token->kind = Token_OpenBracket; break;
case ']':
insert_semicolon = true;
token->kind = Token_CloseBracket;
break;
case '{': token->kind = Token_OpenBrace; break;
case '}': token->kind = Token_CloseBrace; break;
case '}':
insert_semicolon = true;
token->kind = Token_CloseBrace;
break;
case '\\': token->kind = Token_BackSlash; break;
case '%':
@@ -1131,10 +1208,12 @@ void tokenizer_get_token(Tokenizer *t, Token *token) {
case '#':
if (t->curr_rune == '!') {
insert_semicolon = t->insert_semicolon;
token->kind = Token_Comment;
while (t->curr_rune != '\n' && t->curr_rune != GB_RUNE_EOF) {
advance_to_next_rune(t);
}
token->kind = Token_Comment;
} else {
token->kind = Token_Hash;
}
@@ -1144,6 +1223,7 @@ void tokenizer_get_token(Tokenizer *t, Token *token) {
case '/': {
token->kind = Token_Quo;
if (t->curr_rune == '/') {
insert_semicolon = t->insert_semicolon;
token->kind = Token_Comment;
while (t->curr_rune != '\n' && t->curr_rune != GB_RUNE_EOF) {
@@ -1255,11 +1335,18 @@ void tokenizer_get_token(Tokenizer *t, Token *token) {
int len = cast(int)gb_utf8_encode_rune(str, curr_rune);
tokenizer_err(t, "Illegal character: %.*s (%d) ", len, str, curr_rune);
}
insert_semicolon = t->insert_semicolon; // Preserve insert_semicolon info
token->kind = Token_Invalid;
break;
}
}
if (t->flags & TokenizerFlag_InsertSemicolon) {
t->insert_semicolon = insert_semicolon;
}
token->string.len = t->curr - token->string.text;
return;
}