diff --git a/src/array.cpp b/src/array.cpp index ea8873908..3df688e2e 100644 --- a/src/array.cpp +++ b/src/array.cpp @@ -33,6 +33,7 @@ template Array array_make (gbAllocator const &a, isize template Array array_make_from_ptr (T *data, isize count, isize capacity); template void array_free (Array *array); template void array_add (Array *array, T const &t); +template T * array_add_and_get (Array *array); template void array_add_elems (Array *array, T const *elems, isize elem_count); template T array_pop (Array *array); template void array_clear (Array *array); @@ -42,6 +43,7 @@ template void array_set_capacity (Array *array, isize capac template Array array_slice (Array const &array, isize lo, isize hi); + template void array_ordered_remove (Array *array, isize index); template void array_unordered_remove(Array *array, isize index); @@ -158,6 +160,18 @@ void array_add(Array *array, T const &t) { array->count++; } +template +T *array_add_and_get(Array *array) { + if (array->count < array->capacity) { + return &array->data[array->count++]; + } + if (array->capacity < array->count+1) { + array__grow(array, 0); + } + return &array->data[array->count++]; +} + + template void array_add_elems(Array *array, T const *elems, isize elem_count) { GB_ASSERT(elem_count >= 0); diff --git a/src/big_int.cpp b/src/big_int.cpp index 1db9eafa7..5985f7eaf 100644 --- a/src/big_int.cpp +++ b/src/big_int.cpp @@ -64,7 +64,7 @@ void big_int_dealloc(BigInt *dst) { if (dst->len > 1) { gb_free(big_int_allocator(), dst->d.words); } - gb_zero_item(dst); + zero_item(dst); } BigInt big_int_make(BigInt const *b, bool abs=false); diff --git a/src/checker.cpp b/src/checker.cpp index a5885705e..a08f04945 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -531,7 +531,7 @@ bool check_vet_shadowing(Checker *c, Entity *e, VettedEntity *ve) { } } - gb_zero_item(ve); + zero_item(ve); ve->kind = VettedEntity_Shadowed; ve->entity = e; ve->other = shadowed; @@ -547,7 +547,7 @@ bool check_vet_unused(Checker *c, Entity *e, VettedEntity *ve) { } case Entity_ImportName: case Entity_LibraryName: - gb_zero_item(ve); + zero_item(ve); ve->kind = VettedEntity_Unused; ve->entity = e; return true; diff --git a/src/common.cpp b/src/common.cpp index 7068eb333..0f058d8f3 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -74,7 +74,7 @@ GB_ALLOCATOR_PROC(heap_allocator_proc) { case gbAllocation_Alloc: ptr = _aligned_malloc(size, alignment); if (flags & gbAllocatorFlag_ClearToZero) { - gb_zero_size(ptr, size); + zero_size(ptr, size); } break; case gbAllocation_Free: @@ -105,7 +105,7 @@ GB_ALLOCATOR_PROC(heap_allocator_proc) { // ptr = malloc(size+alignment); if (flags & gbAllocatorFlag_ClearToZero) { - gb_zero_size(ptr, size); + zero_size(ptr, size); } break; } @@ -126,7 +126,7 @@ GB_ALLOCATOR_PROC(heap_allocator_proc) { posix_memalign(&ptr, alignment, size); if (flags & gbAllocatorFlag_ClearToZero) { - gb_zero_size(ptr, size); + zero_size(ptr, size); } break; } @@ -347,6 +347,12 @@ void mul_overflow_u64(u64 x, u64 y, u64 *lo, u64 *hi) { #endif } +gb_inline void zero_size(void *ptr, isize len) { + memset(ptr, 0, len); +} + +#define zero_item(ptr) zero_size((ptr), gb_size_of(ptr)) + gb_global String global_module_path = {0}; @@ -376,27 +382,27 @@ typedef struct Arena { void arena_init(Arena *arena, gbAllocator backing, isize block_size=ARENA_DEFAULT_BLOCK_SIZE) { arena->backing = backing; arena->block_size = block_size; - array_init(&arena->blocks, backing); + array_init(&arena->blocks, backing, 0, 2); gb_mutex_init(&arena->mutex); } void arena_grow(Arena *arena, isize min_size) { - gb_mutex_lock(&arena->mutex); - defer (gb_mutex_unlock(&arena->mutex)); + // gb_mutex_lock(&arena->mutex); + // defer (gb_mutex_unlock(&arena->mutex)); isize size = gb_max(arena->block_size, min_size); size = ALIGN_UP(size, ARENA_MIN_ALIGNMENT); void *new_ptr = gb_alloc(arena->backing, size); arena->ptr = cast(u8 *)new_ptr; - // gb_zero_size(arena->ptr, size); // NOTE(bill): This should already be zeroed + // zero_size(arena->ptr, size); // NOTE(bill): This should already be zeroed GB_ASSERT(arena->ptr == ALIGN_DOWN_PTR(arena->ptr, ARENA_MIN_ALIGNMENT)); arena->end = arena->ptr + size; array_add(&arena->blocks, arena->ptr); } void *arena_alloc(Arena *arena, isize size, isize alignment) { - gb_mutex_lock(&arena->mutex); - defer (gb_mutex_unlock(&arena->mutex)); + // gb_mutex_lock(&arena->mutex); + // defer (gb_mutex_unlock(&arena->mutex)); arena->total_used += size; @@ -411,13 +417,13 @@ void *arena_alloc(Arena *arena, isize size, isize alignment) { arena->ptr = cast(u8 *)ALIGN_UP_PTR(arena->ptr + size, align); GB_ASSERT(arena->ptr <= arena->end); GB_ASSERT(ptr == ALIGN_DOWN_PTR(ptr, align)); - gb_zero_size(ptr, size); + // zero_size(ptr, size); return ptr; } void arena_free_all(Arena *arena) { - gb_mutex_lock(&arena->mutex); - defer (gb_mutex_unlock(&arena->mutex)); + // gb_mutex_lock(&arena->mutex); + // defer (gb_mutex_unlock(&arena->mutex)); for_array(i, arena->blocks) { gb_free(arena->backing, arena->blocks[i]); diff --git a/src/main.cpp b/src/main.cpp index 0e3d5836d..30000961c 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1167,10 +1167,14 @@ void show_timings(Checker *c, Timings *t) { isize files = 0; isize packages = p->packages.count; isize total_file_size = 0; + f64 total_tokenizing_time = 0; + f64 total_parsing_time = 0; for_array(i, p->packages) { files += p->packages[i]->files.count; for_array(j, p->packages[i]->files) { AstFile *file = p->packages[i]->files[j]; + total_tokenizing_time += file->time_to_tokenize; + total_parsing_time += file->time_to_parse; total_file_size += file->tokenizer.end - file->tokenizer.start; } } @@ -1186,6 +1190,32 @@ void show_timings(Checker *c, Timings *t) { gb_printf("Total File Size - %td\n", total_file_size); gb_printf("\n"); } + { + f64 time = total_tokenizing_time; + gb_printf("Tokenization Only\n"); + gb_printf("LOC/s - %.3f\n", cast(f64)lines/time); + gb_printf("us/LOC - %.3f\n", 1.0e6*time/cast(f64)lines); + gb_printf("Tokens/s - %.3f\n", cast(f64)tokens/time); + gb_printf("us/Token - %.3f\n", 1.0e6*time/cast(f64)tokens); + gb_printf("bytes/s - %.3f\n", cast(f64)total_file_size/time); + gb_printf("MiB/s - %.3f\n", cast(f64)(total_file_size/time)/(1024*1024)); + gb_printf("us/bytes - %.3f\n", 1.0e6*time/cast(f64)total_file_size); + + gb_printf("\n"); + } + { + f64 time = total_parsing_time; + gb_printf("Parsing Only\n"); + gb_printf("LOC/s - %.3f\n", cast(f64)lines/time); + gb_printf("us/LOC - %.3f\n", 1.0e6*time/cast(f64)lines); + gb_printf("Tokens/s - %.3f\n", cast(f64)tokens/time); + gb_printf("us/Token - %.3f\n", 1.0e6*time/cast(f64)tokens); + gb_printf("bytes/s - %.3f\n", cast(f64)total_file_size/time); + gb_printf("MiB/s - %.3f\n", cast(f64)(total_file_size/time)/(1024*1024)); + gb_printf("us/bytes - %.3f\n", 1.0e6*time/cast(f64)total_file_size); + + gb_printf("\n"); + } { TimeStamp ts = {}; for_array(i, t->sections) { diff --git a/src/murmurhash3.cpp b/src/murmurhash3.cpp index ff51cf733..7465fac18 100644 --- a/src/murmurhash3.cpp +++ b/src/murmurhash3.cpp @@ -223,3 +223,36 @@ void MurmurHash3_x86_128(void const *key, isize len, u32 seed, void *out) { // } + + +gb_internal gb_inline u32 murmur_32_scramble(u32 k) { + k *= 0xcc9e2d51; + k = (k << 15) | (k >> 17); + k *= 0x1b873593; + return k; +} + +u32 murmur3_32(u8 const *key, isize len, u32 seed) { + u32 h = seed; + u32 k; + for (size_t i = len >> 2; i; i--) { + memcpy(&k, key, sizeof(u32)); + key += sizeof(u32); + h ^= murmur_32_scramble(k); + h = (h << 13) | (h >> 19); + h = h * 5 + 0xe6546b64; + } + k = 0; + for (size_t i = len & 3; i; i--) { + k <<= 8; + k |= key[i - 1]; + } + h ^= murmur_32_scramble(k); + h ^= len; + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + return h; +} diff --git a/src/parser.cpp b/src/parser.cpp index 5bdadfd9a..143644f70 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -111,7 +111,7 @@ Ast *clone_ast(Ast *node); Array clone_ast_array(Array array) { Array result = {}; if (array.count > 0) { - result = array_make(ast_allocator(), array.count); + result = array_make(ast_allocator(nullptr), array.count); for_array(i, array) { result[i] = clone_ast(array[i]); } @@ -461,7 +461,7 @@ gb_global gbAtomic64 total_subtype_node_memory_test = {0}; // NOTE(bill): And this below is why is I/we need a new language! Discriminated unions are a pain in C/C++ Ast *alloc_ast_node(AstFile *f, AstKind kind) { - gbAllocator a = ast_allocator(); + gbAllocator a = ast_allocator(f); gb_atomic64_fetch_add(&total_allocated_node_memory, cast(i64)(gb_size_of(Ast))); gb_atomic64_fetch_add(&total_subtype_node_memory_test, cast(i64)(gb_size_of(AstCommonStuff) + ast_variant_sizes[kind])); @@ -1161,7 +1161,7 @@ CommentGroup *consume_comment_group(AstFile *f, isize n, isize *end_line_) { CommentGroup *comments = nullptr; if (list.count > 0) { - comments = gb_alloc_item(ast_allocator(), CommentGroup); + comments = gb_alloc_item(heap_allocator(), CommentGroup); comments->list = list; array_add(&f->comments, comments); } @@ -1194,12 +1194,15 @@ void comsume_comment_groups(AstFile *f, Token prev) { Token advance_token(AstFile *f) { - gb_zero_item(&f->lead_comment); - gb_zero_item(&f->line_comment); + f->lead_comment = nullptr; + f->line_comment = nullptr; + Token prev = f->prev_token = f->curr_token; bool ok = next_token0(f); - if (ok) comsume_comment_groups(f, prev); + if (ok && f->curr_token.kind == Token_Comment) { + comsume_comment_groups(f, prev); + } return prev; } @@ -4303,24 +4306,37 @@ ParseFileError init_ast_file(AstFile *f, String fullpath, TokenPos *err_pos) { return ParseFile_None; } + u64 start = time_stamp_time_now(); + while (f->curr_token.kind != Token_EOF) { - Token token = tokenizer_get_token(&f->tokenizer); - if (token.kind == Token_Invalid) { - err_pos->line = token.pos.line; - err_pos->column = token.pos.column; + Token *token = array_add_and_get(&f->tokens); + tokenizer_get_token(&f->tokenizer, token); + if (token->kind == Token_Invalid) { + err_pos->line = token->pos.line; + err_pos->column = token->pos.column; return ParseFile_InvalidToken; } - array_add(&f->tokens, token); - if (token.kind == Token_EOF) { + if (token->kind == Token_EOF) { break; } } + u64 end = time_stamp_time_now(); + f->time_to_tokenize = cast(f64)(end-start)/cast(f64)time_stamp__freq(); + f->curr_token_index = 0; f->prev_token = f->tokens[f->curr_token_index]; f->curr_token = f->tokens[f->curr_token_index]; + isize const page_size = 4*1024; + isize block_size = 2*f->tokens.count*gb_size_of(Ast); + block_size = ((block_size + page_size-1)/page_size) * page_size; + block_size = gb_clamp(block_size, page_size, ARENA_DEFAULT_BLOCK_SIZE); + + arena_init(&f->arena, heap_allocator(), block_size); + + array_init(&f->comments, heap_allocator(), 0, 0); array_init(&f->imports, heap_allocator(), 0, 0); @@ -4843,9 +4859,13 @@ bool parse_file(Parser *p, AstFile *f) { return true; } + u64 start = time_stamp_time_now(); + String filepath = f->tokenizer.fullpath; String base_dir = dir_from_path(filepath); - comsume_comment_groups(f, f->prev_token); + if (f->curr_token.kind == Token_Comment) { + comsume_comment_groups(f, f->prev_token); + } CommentGroup *docs = f->lead_comment; @@ -4886,27 +4906,29 @@ bool parse_file(Parser *p, AstFile *f) { expect_semicolon(f, pd); f->pkg_decl = pd; - if (f->error_count > 0) { - return false; - } + if (f->error_count == 0) { + f->decls = array_make(heap_allocator()); - f->decls = array_make(heap_allocator()); - - while (f->curr_token.kind != Token_EOF) { - Ast *stmt = parse_stmt(f); - if (stmt && stmt->kind != Ast_EmptyStmt) { - array_add(&f->decls, stmt); - if (stmt->kind == Ast_ExprStmt && - stmt->ExprStmt.expr != nullptr && - stmt->ExprStmt.expr->kind == Ast_ProcLit) { - syntax_error(stmt, "Procedure literal evaluated but not used"); + while (f->curr_token.kind != Token_EOF) { + Ast *stmt = parse_stmt(f); + if (stmt && stmt->kind != Ast_EmptyStmt) { + array_add(&f->decls, stmt); + if (stmt->kind == Ast_ExprStmt && + stmt->ExprStmt.expr != nullptr && + stmt->ExprStmt.expr->kind == Ast_ProcLit) { + syntax_error(stmt, "Procedure literal evaluated but not used"); + } } } + + parse_setup_file_decls(p, f, base_dir, f->decls); } - parse_setup_file_decls(p, f, base_dir, f->decls); + u64 end = time_stamp_time_now(); + f->time_to_parse = cast(f64)(end-start)/cast(f64)time_stamp__freq(); - return true; + + return f->error_count == 0; } diff --git a/src/parser.hpp b/src/parser.hpp index 9cf8ebbf2..a98060df9 100644 --- a/src/parser.hpp +++ b/src/parser.hpp @@ -77,6 +77,8 @@ struct AstFile { AstPackage * pkg; Scope * scope; + Arena arena; + Ast * pkg_decl; String fullpath; Tokenizer tokenizer; @@ -102,6 +104,8 @@ struct AstFile { Ast * curr_proc; isize error_count; + f64 time_to_tokenize; // seconds + f64 time_to_parse; // seconds CommentGroup *lead_comment; // Comment (block) before the decl CommentGroup *line_comment; // Comment after the semicolon @@ -644,8 +648,9 @@ gb_inline bool is_ast_when_stmt(Ast *node) { gb_global Arena global_ast_arena = {}; -gbAllocator ast_allocator(void) { - Arena *arena = &global_ast_arena; +gbAllocator ast_allocator(AstFile *f) { + Arena *arena = f ? &f->arena : &global_ast_arena; + // Arena *arena = &global_ast_arena; return arena_allocator(arena); } diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index a7205f664..6075bb9a5 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -146,11 +146,20 @@ enum { }; gb_global KeywordHashEntry keyword_hash_table[KEYWORD_HASH_TABLE_COUNT] = {}; GB_STATIC_ASSERT(Token__KeywordEnd-Token__KeywordBegin <= gb_count_of(keyword_hash_table)); +gb_global isize const min_keyword_size = 2; +gb_global isize max_keyword_size = 11; +gb_global bool keyword_indices[16] = {}; + gb_inline u32 keyword_hash(u8 const *text, isize len) { return fnv32a(text, len); + // return murmur3_32(text, len, 0x6f64696e); } void add_keyword_hash_entry(String const &s, TokenKind kind) { + max_keyword_size = gb_max(max_keyword_size, s.len); + + keyword_indices[s.len] = true; + u32 hash = keyword_hash(s.text, s.len); // NOTE(bill): This is a bit of an empirical hack in order to speed things up @@ -175,6 +184,8 @@ void init_keyword_hash_table(void) { for (i32 i = 0; i < gb_count_of(legacy_keywords); i++) { add_keyword_hash_entry(legacy_keywords[i].s, legacy_keywords[i].kind); } + + GB_ASSERT(max_keyword_size < 16); } @@ -679,19 +690,18 @@ u8 peek_byte(Tokenizer *t, isize offset=0) { return 0; } -Token scan_number_to_token(Tokenizer *t, bool seen_decimal_point) { - Token token = {}; - token.kind = Token_Integer; - token.string = {t->curr, 1}; - token.pos.file = t->fullpath; - token.pos.line = t->line_count; - token.pos.column = t->curr-t->line+1; +void scan_number_to_token(Tokenizer *t, Token *token, bool seen_decimal_point) { + token->kind = Token_Integer; + token->string = {t->curr, 1}; + token->pos.file = t->fullpath; + token->pos.line = t->line_count; + token->pos.column = t->curr-t->line+1; if (seen_decimal_point) { - token.string.text -= 1; - token.string.len += 1; - token.pos.column -= 1; - token.kind = Token_Float; + token->string.text -= 1; + token->string.len += 1; + token->pos.column -= 1; + token->kind = Token_Float; scan_mantissa(t, 10); goto exponent; } @@ -704,43 +714,43 @@ Token scan_number_to_token(Tokenizer *t, bool seen_decimal_point) { advance_to_next_rune(t); scan_mantissa(t, 2); if (t->curr - prev <= 2) { - token.kind = Token_Invalid; + token->kind = Token_Invalid; } goto end; case 'o': // Octal advance_to_next_rune(t); scan_mantissa(t, 8); if (t->curr - prev <= 2) { - token.kind = Token_Invalid; + token->kind = Token_Invalid; } goto end; case 'd': // Decimal advance_to_next_rune(t); scan_mantissa(t, 10); if (t->curr - prev <= 2) { - token.kind = Token_Invalid; + token->kind = Token_Invalid; } goto end; case 'z': // Dozenal advance_to_next_rune(t); scan_mantissa(t, 12); if (t->curr - prev <= 2) { - token.kind = Token_Invalid; + token->kind = Token_Invalid; } goto end; case 'x': // Hexadecimal advance_to_next_rune(t); scan_mantissa(t, 16); if (t->curr - prev <= 2) { - token.kind = Token_Invalid; + token->kind = Token_Invalid; } goto end; case 'h': // Hexadecimal Float - token.kind = Token_Float; + token->kind = Token_Float; advance_to_next_rune(t); scan_mantissa(t, 16); if (t->curr - prev <= 2) { - token.kind = Token_Invalid; + token->kind = Token_Invalid; } else { u8 *start = prev+2; isize n = t->curr - start; @@ -777,13 +787,13 @@ fraction: } advance_to_next_rune(t); - token.kind = Token_Float; + token->kind = Token_Float; scan_mantissa(t, 10); } exponent: if (t->curr_rune == 'e' || t->curr_rune == 'E') { - token.kind = Token_Float; + token->kind = Token_Float; advance_to_next_rune(t); if (t->curr_rune == '-' || t->curr_rune == '+') { advance_to_next_rune(t); @@ -793,14 +803,14 @@ exponent: switch (t->curr_rune) { case 'i': case 'j': case 'k': - token.kind = Token_Imag; + token->kind = Token_Imag; advance_to_next_rune(t); break; } end: - token.string.len = t->curr - token.string.text; - return token; + token->string.len = t->curr - token->string.text; + return; } @@ -870,59 +880,8 @@ bool scan_escape(Tokenizer *t) { return true; } -gb_inline TokenKind token_kind_variant2(Tokenizer *t, TokenKind a, TokenKind b) { - if (t->curr_rune == '=') { - advance_to_next_rune(t); - return b; - } - return a; -} - -gb_inline TokenKind token_kind_variant3(Tokenizer *t, TokenKind a, TokenKind b, Rune ch_c, TokenKind c) { - if (t->curr_rune == '=') { - advance_to_next_rune(t); - return b; - } - if (t->curr_rune == ch_c) { - advance_to_next_rune(t); - return c; - } - return a; -} - -gb_inline TokenKind token_kind_variant4(Tokenizer *t, TokenKind a, TokenKind b, Rune ch_c, TokenKind c, Rune ch_d, TokenKind d) { - if (t->curr_rune == '=') { - advance_to_next_rune(t); - return b; - } else if (t->curr_rune == ch_c) { - advance_to_next_rune(t); - return c; - } else if (t->curr_rune == ch_d) { - advance_to_next_rune(t); - return d; - } - return a; -} - - -gb_inline TokenKind token_kind_dub_eq(Tokenizer *t, Rune sing_rune, TokenKind sing, TokenKind sing_eq, TokenKind dub, TokenKind dub_eq) { - if (t->curr_rune == '=') { - advance_to_next_rune(t); - return sing_eq; - } else if (t->curr_rune == sing_rune) { - advance_to_next_rune(t); - if (t->curr_rune == '=') { - advance_to_next_rune(t); - return dub_eq; - } - return dub; - } - return sing; -} - - -Token tokenizer_get_token(Tokenizer *t) { +void tokenizer_get_token(Tokenizer *t, Token *token) { // Skip whitespace for (;;) { switch (t->curr_rune) { @@ -936,49 +895,49 @@ Token tokenizer_get_token(Tokenizer *t) { break; } - Token token = {}; - token.string.text = t->curr; - token.string.len = 1; - token.pos.file.text = t->fullpath.text; - token.pos.file.len = t->fullpath.len; - token.pos.line = t->line_count; - token.pos.offset = t->curr - t->start; - token.pos.column = t->curr - t->line + 1; + token->kind = Token_Invalid; + token->string.text = t->curr; + token->string.len = 1; + token->pos.file.text = t->fullpath.text; + token->pos.file.len = t->fullpath.len; + token->pos.line = t->line_count; + token->pos.offset = t->curr - t->start; + token->pos.column = t->curr - t->line + 1; Rune curr_rune = t->curr_rune; if (rune_is_letter(curr_rune)) { - token.kind = Token_Ident; + token->kind = Token_Ident; while (rune_is_letter_or_digit(t->curr_rune)) { advance_to_next_rune(t); } - token.string.len = t->curr - token.string.text; + token->string.len = t->curr - token->string.text; - // NOTE(bill): All keywords are > 1 - if (token.string.len > 1) { - u32 hash = keyword_hash(token.string.text, token.string.len); + // NOTE(bill): Heavily optimize to make it faster to find keywords + if (1 < token->string.len && token->string.len <= max_keyword_size && keyword_indices[token->string.len]) { + u32 hash = keyword_hash(token->string.text, token->string.len); u32 index = hash & KEYWORD_HASH_TABLE_MASK; KeywordHashEntry *entry = &keyword_hash_table[index]; - if (entry->kind != Token_Invalid) { + if (entry->kind != Token_Invalid && entry->hash == hash) { String const &entry_text = token_strings[entry->kind]; - if (str_eq(entry_text, token.string)) { - token.kind = entry->kind; + if (str_eq(entry_text, token->string)) { + token->kind = entry->kind; } } } } else if (gb_is_between(curr_rune, '0', '9')) { - token = scan_number_to_token(t, false); + scan_number_to_token(t, token, false); } else { advance_to_next_rune(t); switch (curr_rune) { case GB_RUNE_EOF: - token.kind = Token_EOF; + token->kind = Token_EOF; break; case '\'': // Rune Literal { - token.kind = Token_Rune; + token->kind = Token_Rune; Rune quote = curr_rune; bool valid = true; i32 n = 0, success; @@ -1004,16 +963,16 @@ Token tokenizer_get_token(Tokenizer *t) { if (valid && n != 1) { tokenizer_err(t, "Invalid rune literal"); } - token.string.len = t->curr - token.string.text; - success = unquote_string(heap_allocator(), &token.string); + token->string.len = t->curr - token->string.text; + success = unquote_string(heap_allocator(), &token->string, 0); if (success > 0) { if (success == 2) { - array_add(&t->allocated_strings, token.string); + array_add(&t->allocated_strings, token->string); } - return token; } else { tokenizer_err(t, "Invalid rune literal"); } + return; } break; case '`': // Raw String Literal @@ -1022,7 +981,7 @@ Token tokenizer_get_token(Tokenizer *t) { bool has_carriage_return = false; i32 success; Rune quote = curr_rune; - token.kind = Token_String; + token->kind = Token_String; if (curr_rune == '"') { for (;;) { Rune r = t->curr_rune; @@ -1054,82 +1013,118 @@ Token tokenizer_get_token(Tokenizer *t) { } } } - token.string.len = t->curr - token.string.text; - success = unquote_string(heap_allocator(), &token.string, 0, has_carriage_return); + token->string.len = t->curr - token->string.text; + success = unquote_string(heap_allocator(), &token->string, 0, has_carriage_return); if (success > 0) { if (success == 2) { - array_add(&t->allocated_strings, token.string); + array_add(&t->allocated_strings, token->string); } - return token; } else { tokenizer_err(t, "Invalid string literal"); } + return; } break; case '.': if (t->curr_rune == '.') { advance_to_next_rune(t); - token.kind = Token_Ellipsis; + token->kind = Token_Ellipsis; if (t->curr_rune == '<') { advance_to_next_rune(t); - token.kind = Token_RangeHalf; + token->kind = Token_RangeHalf; } } else if ('0' <= t->curr_rune && t->curr_rune <= '9') { - token = scan_number_to_token(t, true); + scan_number_to_token(t, token, true); } else { - token.kind = Token_Period; + token->kind = Token_Period; } break; - case '@': token.kind = Token_At; break; - case '$': token.kind = Token_Dollar; break; - case '?': token.kind = Token_Question; break; - case '^': token.kind = Token_Pointer; break; - case ';': token.kind = Token_Semicolon; break; - case ',': token.kind = Token_Comma; break; - case ':': token.kind = Token_Colon; break; - case '(': token.kind = Token_OpenParen; break; - case ')': token.kind = Token_CloseParen; break; - case '[': token.kind = Token_OpenBracket; break; - case ']': token.kind = Token_CloseBracket; break; - case '{': token.kind = Token_OpenBrace; break; - case '}': token.kind = Token_CloseBrace; break; - case '\\': token.kind = Token_BackSlash; break; + case '@': token->kind = Token_At; break; + case '$': token->kind = Token_Dollar; break; + case '?': token->kind = Token_Question; break; + case '^': token->kind = Token_Pointer; break; + case ';': token->kind = Token_Semicolon; break; + case ',': token->kind = Token_Comma; break; + case ':': token->kind = Token_Colon; break; + case '(': token->kind = Token_OpenParen; break; + case ')': token->kind = Token_CloseParen; break; + case '[': token->kind = Token_OpenBracket; break; + case ']': token->kind = Token_CloseBracket; break; + case '{': token->kind = Token_OpenBrace; break; + case '}': token->kind = Token_CloseBrace; break; + case '\\': token->kind = Token_BackSlash; break; - // case 0x2260: token.kind = Token_NotEq; break; // '≠' - // case 0x2264: token.kind = Token_LtEq; break; // '≤' - // case 0x2265: token.kind = Token_GtEq; break; // '≥' - // case 0x2208: token.kind = Token_in; break; // '∈' - // case 0x2209: token.kind = Token_not_in; break; // '∉' + // case 0x2260: token->kind = Token_NotEq; break; // '≠' + // case 0x2264: token->kind = Token_LtEq; break; // '≤' + // case 0x2265: token->kind = Token_GtEq; break; // '≥' + // case 0x2208: token->kind = Token_in; break; // '∈' + // case 0x2209: token->kind = Token_not_in; break; // '∉' - case '%': token.kind = token_kind_dub_eq(t, '%', Token_Mod, Token_ModEq, Token_ModMod, Token_ModModEq); break; - - case '*': token.kind = token_kind_variant2(t, Token_Mul, Token_MulEq); break; - case '=': - token.kind = Token_Eq; - if (t->curr_rune == '>') { + case '%': + token->kind = Token_Mod; + if (t->curr_rune == '=') { + token->kind = Token_ModEq; + } else if (t->curr_rune == '&') { + token->kind = Token_ModMod; advance_to_next_rune(t); - token.kind = Token_DoubleArrowRight; - } else if (t->curr_rune == '=') { - advance_to_next_rune(t); - token.kind = Token_CmpEq; + if (t->curr_rune == '=') { + token->kind = Token_ModModEq; + advance_to_next_rune(t); + } } break; - case '~': token.kind = token_kind_variant2(t, Token_Xor, Token_XorEq); break; - case '!': token.kind = token_kind_variant2(t, Token_Not, Token_NotEq); break; - case '+': token.kind = token_kind_variant2(t, Token_Add, Token_AddEq); break; - case '-': - token.kind = Token_Sub; + + case '*': + token->kind = Token_Mul; if (t->curr_rune == '=') { advance_to_next_rune(t); - token.kind = Token_SubEq; + token->kind = Token_MulEq; + } + break; + case '=': + token->kind = Token_Eq; + if (t->curr_rune == '>') { + advance_to_next_rune(t); + token->kind = Token_DoubleArrowRight; + } else if (t->curr_rune == '=') { + advance_to_next_rune(t); + token->kind = Token_CmpEq; + } + break; + case '~': + token->kind = Token_Xor; + if (t->curr_rune == '=') { + advance_to_next_rune(t); + token->kind = Token_XorEq; + } + break; + case '!': + token->kind = Token_Not; + if (t->curr_rune == '=') { + advance_to_next_rune(t); + token->kind = Token_NotEq; + } + break; + case '+': + token->kind = Token_Add; + if (t->curr_rune == '=') { + advance_to_next_rune(t); + token->kind = Token_AddEq; + } + break; + case '-': + token->kind = Token_Sub; + if (t->curr_rune == '=') { + advance_to_next_rune(t); + token->kind = Token_SubEq; } else if (t->curr_rune == '-' && peek_byte(t) == '-') { advance_to_next_rune(t); advance_to_next_rune(t); - token.kind = Token_Undef; + token->kind = Token_Undef; } else if (t->curr_rune == '>') { advance_to_next_rune(t); - token.kind = Token_ArrowRight; + token->kind = Token_ArrowRight; } break; @@ -1138,20 +1133,24 @@ Token tokenizer_get_token(Tokenizer *t) { while (t->curr_rune != '\n' && t->curr_rune != GB_RUNE_EOF) { advance_to_next_rune(t); } - token.kind = Token_Comment; + token->kind = Token_Comment; } else { - token.kind = Token_Hash; + token->kind = Token_Hash; } break; case '/': { + token->kind = Token_Quo; if (t->curr_rune == '/') { + token->kind = Token_Comment; + while (t->curr_rune != '\n' && t->curr_rune != GB_RUNE_EOF) { advance_to_next_rune(t); } - token.kind = Token_Comment; } else if (t->curr_rune == '*') { + token->kind = Token_Comment; + isize comment_scope = 1; advance_to_next_rune(t); while (comment_scope > 0) { @@ -1173,37 +1172,81 @@ Token tokenizer_get_token(Tokenizer *t) { advance_to_next_rune(t); } } - token.kind = Token_Comment; - } else { - token.kind = token_kind_variant2(t, Token_Quo, Token_QuoEq); + } else if (t->curr_rune == '=') { + advance_to_next_rune(t); + token->kind = Token_QuoEq; } } break; case '<': + token->kind = Token_Lt; if (t->curr_rune == '-') { advance_to_next_rune(t); - token.kind = Token_ArrowLeft; - } else { - token.kind = token_kind_dub_eq(t, '<', Token_Lt, Token_LtEq, Token_Shl, Token_ShlEq); - } - break; - case '>': token.kind = token_kind_dub_eq(t, '>', Token_Gt, Token_GtEq, Token_Shr, Token_ShrEq); break; - - case '&': - token.kind = Token_And; - if (t->curr_rune == '~') { - token.kind = Token_AndNot; + token->kind = Token_ArrowLeft; + } else if (t->curr_rune == '=') { + token->kind = Token_LtEq; + advance_to_next_rune(t); + } else if (t->curr_rune == '<') { + token->kind = Token_Shl; advance_to_next_rune(t); if (t->curr_rune == '=') { - token.kind = Token_AndNotEq; + token->kind = Token_ShlEq; advance_to_next_rune(t); } - } else { - token.kind = token_kind_dub_eq(t, '&', Token_And, Token_AndEq, Token_CmpAnd, Token_CmpAndEq); } break; - case '|': token.kind = token_kind_dub_eq(t, '|', Token_Or, Token_OrEq, Token_CmpOr, Token_CmpOrEq); break; + case '>': + token->kind = Token_Gt; + if (t->curr_rune == '=') { + token->kind = Token_GtEq; + advance_to_next_rune(t); + } else if (t->curr_rune == '>') { + token->kind = Token_Shr; + advance_to_next_rune(t); + if (t->curr_rune == '=') { + token->kind = Token_ShrEq; + advance_to_next_rune(t); + } + } + break; + + case '&': + token->kind = Token_And; + if (t->curr_rune == '~') { + token->kind = Token_AndNot; + advance_to_next_rune(t); + if (t->curr_rune == '=') { + token->kind = Token_AndNotEq; + advance_to_next_rune(t); + } + } else if (t->curr_rune == '=') { + token->kind = Token_AndEq; + advance_to_next_rune(t); + } else if (t->curr_rune == '&') { + token->kind = Token_CmpAnd; + advance_to_next_rune(t); + if (t->curr_rune == '=') { + token->kind = Token_CmpAndEq; + advance_to_next_rune(t); + } + } + break; + + case '|': + token->kind = Token_Or; + if (t->curr_rune == '=') { + token->kind = Token_OrEq; + advance_to_next_rune(t); + } else if (t->curr_rune == '|') { + token->kind = Token_CmpOr; + advance_to_next_rune(t); + if (t->curr_rune == '=') { + token->kind = Token_CmpOrEq; + advance_to_next_rune(t); + } + } + break; default: if (curr_rune != GB_RUNE_BOM) { @@ -1211,11 +1254,11 @@ Token tokenizer_get_token(Tokenizer *t) { int len = cast(int)gb_utf8_encode_rune(str, curr_rune); tokenizer_err(t, "Illegal character: %.*s (%d) ", len, str, curr_rune); } - token.kind = Token_Invalid; + token->kind = Token_Invalid; break; } } - token.string.len = t->curr - token.string.text; - return token; + token->string.len = t->curr - token->string.text; + return; } diff --git a/src/types.cpp b/src/types.cpp index 274ad2ced..3a0613b2c 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -770,7 +770,7 @@ void set_base_type(Type *t, Type *base) { Type *alloc_type(TypeKind kind) { gbAllocator a = heap_allocator(); Type *t = gb_alloc_item(a, Type); - gb_zero_item(t); + zero_item(t); t->kind = kind; t->cached_size = -1; t->cached_align = -1;