Improve performance of tokenization and parsing

2026-04-29 09:43:56 +00:00 · 2020-05-27 18:23:37 +01:00
parent 876820789e
commit 1a0614b0d7
10 changed files with 367 additions and 214 deletions
--- a/src/array.cpp
+++ b/src/array.cpp
@@ -33,6 +33,7 @@ template <typename T> Array<T> array_make          (gbAllocator const &a, isize
 template <typename T> Array<T> array_make_from_ptr (T *data, isize count, isize capacity);
 template <typename T> void     array_free          (Array<T> *array);
 template <typename T> void     array_add           (Array<T> *array, T const &t);
+template <typename T> T *      array_add_and_get   (Array<T> *array);
 template <typename T> void     array_add_elems     (Array<T> *array, T const *elems, isize elem_count);
 template <typename T> T        array_pop           (Array<T> *array);
 template <typename T> void     array_clear         (Array<T> *array);
@@ -42,6 +43,7 @@ template <typename T> void     array_set_capacity  (Array<T> *array, isize capac
 template <typename T> Array<T> array_slice         (Array<T> const &array, isize lo, isize hi);


+
 template <typename T> void array_ordered_remove  (Array<T> *array, isize index);
 template <typename T> void array_unordered_remove(Array<T> *array, isize index);

@@ -158,6 +160,18 @@ void array_add(Array<T> *array, T const &t) {
 	array->count++;
 }

+template <typename T>
+T *array_add_and_get(Array<T> *array) {
+	if (array->count < array->capacity) {
+		return &array->data[array->count++];
+	}
+	if (array->capacity < array->count+1) {
+		array__grow(array, 0);
+	}
+	return &array->data[array->count++];
+}
+
+
 template <typename T>
 void array_add_elems(Array<T> *array, T const *elems, isize elem_count) {
 	GB_ASSERT(elem_count >= 0);
--- a/src/big_int.cpp
+++ b/src/big_int.cpp
@@ -64,7 +64,7 @@ void big_int_dealloc(BigInt *dst) {
 	if (dst->len > 1) {
 		gb_free(big_int_allocator(), dst->d.words);
 	}
-	gb_zero_item(dst);
+	zero_item(dst);
 }

 BigInt big_int_make(BigInt const *b, bool abs=false);
--- a/src/checker.cpp
+++ b/src/checker.cpp
@@ -531,7 +531,7 @@ bool check_vet_shadowing(Checker *c, Entity *e, VettedEntity *ve) {
 		}
 	}

-	gb_zero_item(ve);
+	zero_item(ve);
 	ve->kind = VettedEntity_Shadowed;
 	ve->entity = e;
 	ve->other = shadowed;
@@ -547,7 +547,7 @@ bool check_vet_unused(Checker *c, Entity *e, VettedEntity *ve) {
 			}
 		case Entity_ImportName:
 		case Entity_LibraryName:
-			gb_zero_item(ve);
+			zero_item(ve);
 			ve->kind = VettedEntity_Unused;
 			ve->entity = e;
 			return true;
--- a/src/common.cpp
+++ b/src/common.cpp
@@ -74,7 +74,7 @@ GB_ALLOCATOR_PROC(heap_allocator_proc) {
 	case gbAllocation_Alloc:
 		ptr = _aligned_malloc(size, alignment);
 		if (flags & gbAllocatorFlag_ClearToZero) {
-			gb_zero_size(ptr, size);
+			zero_size(ptr, size);
 		}
 		break;
 	case gbAllocation_Free:
@@ -105,7 +105,7 @@ GB_ALLOCATOR_PROC(heap_allocator_proc) {
 		// ptr = malloc(size+alignment);

 		if (flags & gbAllocatorFlag_ClearToZero) {
-			gb_zero_size(ptr, size);
+			zero_size(ptr, size);
 		}
 		break;
 	}
@@ -126,7 +126,7 @@ GB_ALLOCATOR_PROC(heap_allocator_proc) {
 		posix_memalign(&ptr, alignment, size);

 		if (flags & gbAllocatorFlag_ClearToZero) {
-			gb_zero_size(ptr, size);
+			zero_size(ptr, size);
 		}
 		break;
 	}
@@ -347,6 +347,12 @@ void mul_overflow_u64(u64 x, u64 y, u64 *lo, u64 *hi) {
 #endif
 }

+gb_inline void zero_size(void *ptr, isize len) {
+	memset(ptr, 0, len);
+}
+
+#define zero_item(ptr) zero_size((ptr), gb_size_of(ptr))
+


 gb_global String global_module_path = {0};
@@ -376,27 +382,27 @@ typedef struct Arena {
 void arena_init(Arena *arena, gbAllocator backing, isize block_size=ARENA_DEFAULT_BLOCK_SIZE) {
 	arena->backing = backing;
 	arena->block_size = block_size;
-	array_init(&arena->blocks, backing);
+	array_init(&arena->blocks, backing, 0, 2);
 	gb_mutex_init(&arena->mutex);
 }

 void arena_grow(Arena *arena, isize min_size) {
-	gb_mutex_lock(&arena->mutex);
-	defer (gb_mutex_unlock(&arena->mutex));
+	// gb_mutex_lock(&arena->mutex);
+	// defer (gb_mutex_unlock(&arena->mutex));

 	isize size = gb_max(arena->block_size, min_size);
 	size = ALIGN_UP(size, ARENA_MIN_ALIGNMENT);
 	void *new_ptr = gb_alloc(arena->backing, size);
 	arena->ptr = cast(u8 *)new_ptr;
-	// gb_zero_size(arena->ptr, size); // NOTE(bill): This should already be zeroed
+	// zero_size(arena->ptr, size); // NOTE(bill): This should already be zeroed
 	GB_ASSERT(arena->ptr == ALIGN_DOWN_PTR(arena->ptr, ARENA_MIN_ALIGNMENT));
 	arena->end = arena->ptr + size;
 	array_add(&arena->blocks, arena->ptr);
 }

 void *arena_alloc(Arena *arena, isize size, isize alignment) {
-	gb_mutex_lock(&arena->mutex);
-	defer (gb_mutex_unlock(&arena->mutex));
+	// gb_mutex_lock(&arena->mutex);
+	// defer (gb_mutex_unlock(&arena->mutex));

 	arena->total_used += size;

@@ -411,13 +417,13 @@ void *arena_alloc(Arena *arena, isize size, isize alignment) {
 	arena->ptr = cast(u8 *)ALIGN_UP_PTR(arena->ptr + size, align);
 	GB_ASSERT(arena->ptr <= arena->end);
 	GB_ASSERT(ptr == ALIGN_DOWN_PTR(ptr, align));
-	gb_zero_size(ptr, size);
+	// zero_size(ptr, size);
 	return ptr;
 }

 void arena_free_all(Arena *arena) {
-	gb_mutex_lock(&arena->mutex);
-	defer (gb_mutex_unlock(&arena->mutex));
+	// gb_mutex_lock(&arena->mutex);
+	// defer (gb_mutex_unlock(&arena->mutex));

 	for_array(i, arena->blocks) {
 		gb_free(arena->backing, arena->blocks[i]);
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1167,10 +1167,14 @@ void show_timings(Checker *c, Timings *t) {
 	isize files    = 0;
 	isize packages = p->packages.count;
 	isize total_file_size = 0;
+	f64 total_tokenizing_time = 0;
+	f64 total_parsing_time = 0;
 	for_array(i, p->packages) {
 		files += p->packages[i]->files.count;
 		for_array(j, p->packages[i]->files) {
 			AstFile *file = p->packages[i]->files[j];
+			total_tokenizing_time += file->time_to_tokenize;
+			total_parsing_time += file->time_to_parse;
 			total_file_size += file->tokenizer.end - file->tokenizer.start;
 		}
 	}
@@ -1186,6 +1190,32 @@ void show_timings(Checker *c, Timings *t) {
 			gb_printf("Total File Size - %td\n", total_file_size);
 			gb_printf("\n");
 		}
+		{
+			f64 time = total_tokenizing_time;
+			gb_printf("Tokenization Only\n");
+			gb_printf("LOC/s        - %.3f\n", cast(f64)lines/time);
+			gb_printf("us/LOC       - %.3f\n", 1.0e6*time/cast(f64)lines);
+			gb_printf("Tokens/s     - %.3f\n", cast(f64)tokens/time);
+			gb_printf("us/Token     - %.3f\n", 1.0e6*time/cast(f64)tokens);
+			gb_printf("bytes/s      - %.3f\n", cast(f64)total_file_size/time);
+			gb_printf("MiB/s        - %.3f\n", cast(f64)(total_file_size/time)/(1024*1024));
+			gb_printf("us/bytes     - %.3f\n", 1.0e6*time/cast(f64)total_file_size);
+
+			gb_printf("\n");
+		}
+		{
+			f64 time = total_parsing_time;
+			gb_printf("Parsing Only\n");
+			gb_printf("LOC/s        - %.3f\n", cast(f64)lines/time);
+			gb_printf("us/LOC       - %.3f\n", 1.0e6*time/cast(f64)lines);
+			gb_printf("Tokens/s     - %.3f\n", cast(f64)tokens/time);
+			gb_printf("us/Token     - %.3f\n", 1.0e6*time/cast(f64)tokens);
+			gb_printf("bytes/s      - %.3f\n", cast(f64)total_file_size/time);
+			gb_printf("MiB/s        - %.3f\n", cast(f64)(total_file_size/time)/(1024*1024));
+			gb_printf("us/bytes     - %.3f\n", 1.0e6*time/cast(f64)total_file_size);
+
+			gb_printf("\n");
+		}
 		{
 			TimeStamp ts = {};
 			for_array(i, t->sections) {
--- a/src/murmurhash3.cpp
+++ b/src/murmurhash3.cpp
@@ -223,3 +223,36 @@ void MurmurHash3_x86_128(void const *key, isize len, u32 seed, void *out) {
 // }


+
+
+gb_internal gb_inline u32 murmur_32_scramble(u32 k) {
+	k *= 0xcc9e2d51;
+	k = (k << 15) | (k >> 17);
+	k *= 0x1b873593;
+	return k;
+}
+
+u32 murmur3_32(u8 const *key, isize len, u32 seed) {
+	u32 h = seed;
+	u32 k;
+	for (size_t i = len >> 2; i; i--) {
+		memcpy(&k, key, sizeof(u32));
+		key += sizeof(u32);
+		h ^= murmur_32_scramble(k);
+		h = (h << 13) | (h >> 19);
+		h = h * 5 + 0xe6546b64;
+	}
+	k = 0;
+	for (size_t i = len & 3; i; i--) {
+		k <<= 8;
+		k |= key[i - 1];
+	}
+	h ^= murmur_32_scramble(k);
+	h ^= len;
+	h ^= h >> 16;
+	h *= 0x85ebca6b;
+	h ^= h >> 13;
+	h *= 0xc2b2ae35;
+	h ^= h >> 16;
+	return h;
+}
--- a/src/parser.cpp
+++ b/src/parser.cpp
@@ -111,7 +111,7 @@ Ast *clone_ast(Ast *node);
 Array<Ast *> clone_ast_array(Array<Ast *> array) {
 	Array<Ast *> result = {};
 	if (array.count > 0) {
-		result = array_make<Ast *>(ast_allocator(), array.count);
+		result = array_make<Ast *>(ast_allocator(nullptr), array.count);
 		for_array(i, array) {
 			result[i] = clone_ast(array[i]);
 		}
@@ -461,7 +461,7 @@ gb_global gbAtomic64 total_subtype_node_memory_test = {0};

 // NOTE(bill): And this below is why is I/we need a new language! Discriminated unions are a pain in C/C++
 Ast *alloc_ast_node(AstFile *f, AstKind kind) {
-	gbAllocator a = ast_allocator();
+	gbAllocator a = ast_allocator(f);

 	gb_atomic64_fetch_add(&total_allocated_node_memory, cast(i64)(gb_size_of(Ast)));
 	gb_atomic64_fetch_add(&total_subtype_node_memory_test, cast(i64)(gb_size_of(AstCommonStuff) + ast_variant_sizes[kind]));
@@ -1161,7 +1161,7 @@ CommentGroup *consume_comment_group(AstFile *f, isize n, isize *end_line_) {

 	CommentGroup *comments = nullptr;
 	if (list.count > 0) {
-		comments = gb_alloc_item(ast_allocator(), CommentGroup);
+		comments = gb_alloc_item(heap_allocator(), CommentGroup);
 		comments->list = list;
 		array_add(&f->comments, comments);
 	}
@@ -1194,12 +1194,15 @@ void comsume_comment_groups(AstFile *f, Token prev) {


 Token advance_token(AstFile *f) {
-	gb_zero_item(&f->lead_comment);
-	gb_zero_item(&f->line_comment);
+	f->lead_comment = nullptr;
+	f->line_comment = nullptr;
+
 	Token prev = f->prev_token = f->curr_token;

 	bool ok = next_token0(f);
-	if (ok) comsume_comment_groups(f, prev);
+	if (ok && f->curr_token.kind == Token_Comment) {
+		comsume_comment_groups(f, prev);
+	}
 	return prev;
 }

@@ -4303,24 +4306,37 @@ ParseFileError init_ast_file(AstFile *f, String fullpath, TokenPos *err_pos) {
 		return ParseFile_None;
 	}

+	u64 start = time_stamp_time_now();
+
 	while (f->curr_token.kind != Token_EOF) {
-		Token token = tokenizer_get_token(&f->tokenizer);
-		if (token.kind == Token_Invalid) {
-			err_pos->line   = token.pos.line;
-			err_pos->column = token.pos.column;
+		Token *token = array_add_and_get(&f->tokens);
+		tokenizer_get_token(&f->tokenizer, token);
+		if (token->kind == Token_Invalid) {
+			err_pos->line   = token->pos.line;
+			err_pos->column = token->pos.column;
 			return ParseFile_InvalidToken;
 		}
-		array_add(&f->tokens, token);

-		if (token.kind == Token_EOF) {
+		if (token->kind == Token_EOF) {
 			break;
 		}
 	}

+	u64 end = time_stamp_time_now();
+	f->time_to_tokenize = cast(f64)(end-start)/cast(f64)time_stamp__freq();
+
 	f->curr_token_index = 0;
 	f->prev_token = f->tokens[f->curr_token_index];
 	f->curr_token = f->tokens[f->curr_token_index];

+	isize const page_size = 4*1024;
+	isize block_size = 2*f->tokens.count*gb_size_of(Ast);
+	block_size = ((block_size + page_size-1)/page_size) * page_size;
+	block_size = gb_clamp(block_size, page_size, ARENA_DEFAULT_BLOCK_SIZE);
+
+	arena_init(&f->arena, heap_allocator(), block_size);
+
+
 	array_init(&f->comments, heap_allocator(), 0, 0);
 	array_init(&f->imports,  heap_allocator(), 0, 0);

@@ -4843,9 +4859,13 @@ bool parse_file(Parser *p, AstFile *f) {
 		return true;
 	}

+	u64 start = time_stamp_time_now();
+
 	String filepath = f->tokenizer.fullpath;
 	String base_dir = dir_from_path(filepath);
-	comsume_comment_groups(f, f->prev_token);
+	if (f->curr_token.kind == Token_Comment) {
+		comsume_comment_groups(f, f->prev_token);
+	}

 	CommentGroup *docs = f->lead_comment;

@@ -4886,27 +4906,29 @@ bool parse_file(Parser *p, AstFile *f) {
 	expect_semicolon(f, pd);
 	f->pkg_decl = pd;

-	if (f->error_count > 0) {
-		return false;
-	}
+	if (f->error_count == 0) {
+		f->decls = array_make<Ast *>(heap_allocator());

-	f->decls = array_make<Ast *>(heap_allocator());
-
-	while (f->curr_token.kind != Token_EOF) {
-		Ast *stmt = parse_stmt(f);
-		if (stmt && stmt->kind != Ast_EmptyStmt) {
-			array_add(&f->decls, stmt);
-			if (stmt->kind == Ast_ExprStmt &&
-			    stmt->ExprStmt.expr != nullptr &&
-			    stmt->ExprStmt.expr->kind == Ast_ProcLit) {
-				syntax_error(stmt, "Procedure literal evaluated but not used");
+		while (f->curr_token.kind != Token_EOF) {
+			Ast *stmt = parse_stmt(f);
+			if (stmt && stmt->kind != Ast_EmptyStmt) {
+				array_add(&f->decls, stmt);
+				if (stmt->kind == Ast_ExprStmt &&
+				    stmt->ExprStmt.expr != nullptr &&
+				    stmt->ExprStmt.expr->kind == Ast_ProcLit) {
+					syntax_error(stmt, "Procedure literal evaluated but not used");
+				}
 			}
 		}
+
+		parse_setup_file_decls(p, f, base_dir, f->decls);
 	}

-	parse_setup_file_decls(p, f, base_dir, f->decls);
+	u64 end = time_stamp_time_now();
+	f->time_to_parse = cast(f64)(end-start)/cast(f64)time_stamp__freq();

-	return true;
+
+	return f->error_count == 0;
 }


--- a/src/parser.hpp
+++ b/src/parser.hpp
@@ -77,6 +77,8 @@ struct AstFile {
 	AstPackage * pkg;
 	Scope *      scope;

+	Arena        arena;
+
 	Ast *        pkg_decl;
 	String       fullpath;
 	Tokenizer    tokenizer;
@@ -102,6 +104,8 @@ struct AstFile {

 	Ast *        curr_proc;
 	isize        error_count;
+	f64          time_to_tokenize; // seconds
+	f64          time_to_parse;    // seconds

 	CommentGroup *lead_comment;     // Comment (block) before the decl
 	CommentGroup *line_comment;     // Comment after the semicolon
@@ -644,8 +648,9 @@ gb_inline bool is_ast_when_stmt(Ast *node) {

 gb_global Arena global_ast_arena = {};

-gbAllocator ast_allocator(void) {
-	Arena *arena = &global_ast_arena;
+gbAllocator ast_allocator(AstFile *f) {
+	Arena *arena = f ? &f->arena : &global_ast_arena;
+	// Arena *arena = &global_ast_arena;
 	return arena_allocator(arena);
 }

--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@@ -146,11 +146,20 @@ enum {
 };
 gb_global KeywordHashEntry keyword_hash_table[KEYWORD_HASH_TABLE_COUNT] = {};
 GB_STATIC_ASSERT(Token__KeywordEnd-Token__KeywordBegin <= gb_count_of(keyword_hash_table));
+gb_global isize const min_keyword_size = 2;
+gb_global isize max_keyword_size = 11;
+gb_global bool keyword_indices[16] = {};
+

 gb_inline u32 keyword_hash(u8 const *text, isize len) {
 	return fnv32a(text, len);
+	// return murmur3_32(text, len, 0x6f64696e);
 }
 void add_keyword_hash_entry(String const &s, TokenKind kind) {
+	max_keyword_size = gb_max(max_keyword_size, s.len);
+
+	keyword_indices[s.len] = true;
+
 	u32 hash = keyword_hash(s.text, s.len);

 	// NOTE(bill): This is a bit of an empirical hack in order to speed things up
@@ -175,6 +184,8 @@ void init_keyword_hash_table(void) {
 	for (i32 i = 0; i < gb_count_of(legacy_keywords); i++) {
 		add_keyword_hash_entry(legacy_keywords[i].s, legacy_keywords[i].kind);
 	}
+
+	GB_ASSERT(max_keyword_size < 16);
 }


@@ -679,19 +690,18 @@ u8 peek_byte(Tokenizer *t, isize offset=0) {
 	return 0;
 }

-Token scan_number_to_token(Tokenizer *t, bool seen_decimal_point) {
-	Token token = {};
-	token.kind = Token_Integer;
-	token.string = {t->curr, 1};
-	token.pos.file = t->fullpath;
-	token.pos.line = t->line_count;
-	token.pos.column = t->curr-t->line+1;
+void scan_number_to_token(Tokenizer *t, Token *token, bool seen_decimal_point) {
+	token->kind = Token_Integer;
+	token->string = {t->curr, 1};
+	token->pos.file = t->fullpath;
+	token->pos.line = t->line_count;
+	token->pos.column = t->curr-t->line+1;

 	if (seen_decimal_point) {
-		token.string.text -= 1;
-		token.string.len  += 1;
-		token.pos.column -= 1;
-		token.kind = Token_Float;
+		token->string.text -= 1;
+		token->string.len  += 1;
+		token->pos.column -= 1;
+		token->kind = Token_Float;
 		scan_mantissa(t, 10);
 		goto exponent;
 	}
@@ -704,43 +714,43 @@ Token scan_number_to_token(Tokenizer *t, bool seen_decimal_point) {
 			advance_to_next_rune(t);
 			scan_mantissa(t, 2);
 			if (t->curr - prev <= 2) {
-				token.kind = Token_Invalid;
+				token->kind = Token_Invalid;
 			}
 			goto end;
 		case 'o': // Octal
 			advance_to_next_rune(t);
 			scan_mantissa(t, 8);
 			if (t->curr - prev <= 2) {
-				token.kind = Token_Invalid;
+				token->kind = Token_Invalid;
 			}
 			goto end;
 		case 'd': // Decimal
 			advance_to_next_rune(t);
 			scan_mantissa(t, 10);
 			if (t->curr - prev <= 2) {
-				token.kind = Token_Invalid;
+				token->kind = Token_Invalid;
 			}
 			goto end;
 		case 'z': // Dozenal
 			advance_to_next_rune(t);
 			scan_mantissa(t, 12);
 			if (t->curr - prev <= 2) {
-				token.kind = Token_Invalid;
+				token->kind = Token_Invalid;
 			}
 			goto end;
 		case 'x': // Hexadecimal
 			advance_to_next_rune(t);
 			scan_mantissa(t, 16);
 			if (t->curr - prev <= 2) {
-				token.kind = Token_Invalid;
+				token->kind = Token_Invalid;
 			}
 			goto end;
 		case 'h': // Hexadecimal Float
-			token.kind = Token_Float;
+			token->kind = Token_Float;
 			advance_to_next_rune(t);
 			scan_mantissa(t, 16);
 			if (t->curr - prev <= 2) {
-				token.kind = Token_Invalid;
+				token->kind = Token_Invalid;
 			} else {
 				u8 *start = prev+2;
 				isize n = t->curr - start;
@@ -777,13 +787,13 @@ fraction:
 		}
 		advance_to_next_rune(t);

-		token.kind = Token_Float;
+		token->kind = Token_Float;
 		scan_mantissa(t, 10);
 	}

 exponent:
 	if (t->curr_rune == 'e' || t->curr_rune == 'E') {
-		token.kind = Token_Float;
+		token->kind = Token_Float;
 		advance_to_next_rune(t);
 		if (t->curr_rune == '-' || t->curr_rune == '+') {
 			advance_to_next_rune(t);
@@ -793,14 +803,14 @@ exponent:

 	switch (t->curr_rune) {
 	case 'i': case 'j': case 'k':
-		token.kind = Token_Imag;
+		token->kind = Token_Imag;
 		advance_to_next_rune(t);
 		break;
 	}

 end:
-	token.string.len = t->curr - token.string.text;
-	return token;
+	token->string.len = t->curr - token->string.text;
+	return;
 }


@@ -870,59 +880,8 @@ bool scan_escape(Tokenizer *t) {
 	return true;
 }

-gb_inline TokenKind token_kind_variant2(Tokenizer *t, TokenKind a, TokenKind b) {
-	if (t->curr_rune == '=') {
-		advance_to_next_rune(t);
-		return b;
-	}
-	return a;
-}

-
-gb_inline TokenKind token_kind_variant3(Tokenizer *t, TokenKind a, TokenKind b, Rune ch_c, TokenKind c) {
-	if (t->curr_rune == '=') {
-		advance_to_next_rune(t);
-		return b;
-	}
-	if (t->curr_rune == ch_c) {
-		advance_to_next_rune(t);
-		return c;
-	}
-	return a;
-}
-
-gb_inline TokenKind token_kind_variant4(Tokenizer *t, TokenKind a, TokenKind b, Rune ch_c, TokenKind c, Rune ch_d, TokenKind d) {
-	if (t->curr_rune == '=') {
-		advance_to_next_rune(t);
-		return b;
-	} else if (t->curr_rune == ch_c) {
-		advance_to_next_rune(t);
-		return c;
-	} else if (t->curr_rune == ch_d) {
-		advance_to_next_rune(t);
-		return d;
-	}
-	return a;
-}
-
-
-gb_inline TokenKind token_kind_dub_eq(Tokenizer *t, Rune sing_rune, TokenKind sing, TokenKind sing_eq, TokenKind dub, TokenKind dub_eq) {
-	if (t->curr_rune == '=') {
-		advance_to_next_rune(t);
-		return sing_eq;
-	} else if (t->curr_rune == sing_rune) {
-		advance_to_next_rune(t);
-		if (t->curr_rune == '=') {
-			advance_to_next_rune(t);
-			return dub_eq;
-		}
-		return dub;
-	}
-	return sing;
-}
-
-
-Token tokenizer_get_token(Tokenizer *t) {
+void tokenizer_get_token(Tokenizer *t, Token *token) {
 	// Skip whitespace
 	for (;;) {
 		switch (t->curr_rune) {
@@ -936,49 +895,49 @@ Token tokenizer_get_token(Tokenizer *t) {
 		break;
 	}

-	Token token = {};
-	token.string.text = t->curr;
-	token.string.len  = 1;
-	token.pos.file.text = t->fullpath.text;
-	token.pos.file.len  = t->fullpath.len;
-	token.pos.line = t->line_count;
-	token.pos.offset = t->curr - t->start;
-	token.pos.column = t->curr - t->line + 1;
+	token->kind = Token_Invalid;
+	token->string.text = t->curr;
+	token->string.len  = 1;
+	token->pos.file.text = t->fullpath.text;
+	token->pos.file.len  = t->fullpath.len;
+	token->pos.line = t->line_count;
+	token->pos.offset = t->curr - t->start;
+	token->pos.column = t->curr - t->line + 1;

 	Rune curr_rune = t->curr_rune;
 	if (rune_is_letter(curr_rune)) {
-		token.kind = Token_Ident;
+		token->kind = Token_Ident;
 		while (rune_is_letter_or_digit(t->curr_rune)) {
 			advance_to_next_rune(t);
 		}

-		token.string.len = t->curr - token.string.text;
+		token->string.len = t->curr - token->string.text;

-		// NOTE(bill): All keywords are > 1
-		if (token.string.len > 1) {
-			u32 hash = keyword_hash(token.string.text, token.string.len);
+		// NOTE(bill): Heavily optimize to make it faster to find keywords
+		if (1 < token->string.len && token->string.len <= max_keyword_size && keyword_indices[token->string.len]) {
+			u32 hash = keyword_hash(token->string.text, token->string.len);
 			u32 index = hash & KEYWORD_HASH_TABLE_MASK;
 			KeywordHashEntry *entry = &keyword_hash_table[index];
-			if (entry->kind != Token_Invalid) {
+			if (entry->kind != Token_Invalid && entry->hash == hash) {
 				String const &entry_text = token_strings[entry->kind];
-				if (str_eq(entry_text, token.string)) {
-					token.kind = entry->kind;
+				if (str_eq(entry_text, token->string)) {
+					token->kind = entry->kind;
 				}
 			}
 		}

 	} else if (gb_is_between(curr_rune, '0', '9')) {
-		token = scan_number_to_token(t, false);
+		scan_number_to_token(t, token, false);
 	} else {
 		advance_to_next_rune(t);
 		switch (curr_rune) {
 		case GB_RUNE_EOF:
-			token.kind = Token_EOF;
+			token->kind = Token_EOF;
 			break;

 		case '\'': // Rune Literal
 		{
-			token.kind = Token_Rune;
+			token->kind = Token_Rune;
 			Rune quote = curr_rune;
 			bool valid = true;
 			i32 n = 0, success;
@@ -1004,16 +963,16 @@ Token tokenizer_get_token(Tokenizer *t) {
 			if (valid && n != 1) {
 				tokenizer_err(t, "Invalid rune literal");
 			}
-			token.string.len = t->curr - token.string.text;
-			success = unquote_string(heap_allocator(), &token.string);
+			token->string.len = t->curr - token->string.text;
+			success = unquote_string(heap_allocator(), &token->string, 0);
 			if (success > 0) {
 				if (success == 2) {
-					array_add(&t->allocated_strings, token.string);
+					array_add(&t->allocated_strings, token->string);
 				}
-				return token;
 			} else {
 				tokenizer_err(t, "Invalid rune literal");
 			}
+			return;
 		} break;

 		case '`': // Raw String Literal
@@ -1022,7 +981,7 @@ Token tokenizer_get_token(Tokenizer *t) {
 			bool has_carriage_return = false;
 			i32 success;
 			Rune quote = curr_rune;
-			token.kind = Token_String;
+			token->kind = Token_String;
 			if (curr_rune == '"') {
 				for (;;) {
 					Rune r = t->curr_rune;
@@ -1054,82 +1013,118 @@ Token tokenizer_get_token(Tokenizer *t) {
 					}
 				}
 			}
-			token.string.len = t->curr - token.string.text;
-			success = unquote_string(heap_allocator(), &token.string, 0, has_carriage_return);
+			token->string.len = t->curr - token->string.text;
+			success = unquote_string(heap_allocator(), &token->string, 0, has_carriage_return);
 			if (success > 0) {
 				if (success == 2) {
-					array_add(&t->allocated_strings, token.string);
+					array_add(&t->allocated_strings, token->string);
 				}
-				return token;
 			} else {
 				tokenizer_err(t, "Invalid string literal");
 			}
+			return;
 		} break;

 		case '.':
 			if (t->curr_rune == '.') {
 				advance_to_next_rune(t);
-				token.kind = Token_Ellipsis;
+				token->kind = Token_Ellipsis;
 				if (t->curr_rune == '<') {
 					advance_to_next_rune(t);
-					token.kind = Token_RangeHalf;
+					token->kind = Token_RangeHalf;
 				}
 			} else if ('0' <= t->curr_rune && t->curr_rune <= '9') {
-				token = scan_number_to_token(t, true);
+				scan_number_to_token(t, token, true);
 			} else {
-				token.kind = Token_Period;
+				token->kind = Token_Period;
 			}
 			break;

-		case '@':  token.kind = Token_At;           break;
-		case '$':  token.kind = Token_Dollar;       break;
-		case '?':  token.kind = Token_Question;     break;
-		case '^':  token.kind = Token_Pointer;      break;
-		case ';':  token.kind = Token_Semicolon;    break;
-		case ',':  token.kind = Token_Comma;        break;
-		case ':':  token.kind = Token_Colon;        break;
-		case '(':  token.kind = Token_OpenParen;    break;
-		case ')':  token.kind = Token_CloseParen;   break;
-		case '[':  token.kind = Token_OpenBracket;  break;
-		case ']':  token.kind = Token_CloseBracket; break;
-		case '{':  token.kind = Token_OpenBrace;    break;
-		case '}':  token.kind = Token_CloseBrace;   break;
-		case '\\': token.kind = Token_BackSlash;    break;
+		case '@':  token->kind = Token_At;           break;
+		case '$':  token->kind = Token_Dollar;       break;
+		case '?':  token->kind = Token_Question;     break;
+		case '^':  token->kind = Token_Pointer;      break;
+		case ';':  token->kind = Token_Semicolon;    break;
+		case ',':  token->kind = Token_Comma;        break;
+		case ':':  token->kind = Token_Colon;        break;
+		case '(':  token->kind = Token_OpenParen;    break;
+		case ')':  token->kind = Token_CloseParen;   break;
+		case '[':  token->kind = Token_OpenBracket;  break;
+		case ']':  token->kind = Token_CloseBracket; break;
+		case '{':  token->kind = Token_OpenBrace;    break;
+		case '}':  token->kind = Token_CloseBrace;   break;
+		case '\\': token->kind = Token_BackSlash;    break;

-		// case 0x2260: token.kind = Token_NotEq;  break; // '≠'
-		// case 0x2264: token.kind = Token_LtEq;   break; // '≤'
-		// case 0x2265: token.kind = Token_GtEq;   break; // '≥'
-		// case 0x2208: token.kind = Token_in;     break; // '∈'
-		// case 0x2209: token.kind = Token_not_in; break; // '∉'
+		// case 0x2260: token->kind = Token_NotEq;  break; // '≠'
+		// case 0x2264: token->kind = Token_LtEq;   break; // '≤'
+		// case 0x2265: token->kind = Token_GtEq;   break; // '≥'
+		// case 0x2208: token->kind = Token_in;     break; // '∈'
+		// case 0x2209: token->kind = Token_not_in; break; // '∉'

-		case '%': token.kind = token_kind_dub_eq(t, '%', Token_Mod, Token_ModEq, Token_ModMod, Token_ModModEq);  break;
-
-		case '*': token.kind = token_kind_variant2(t, Token_Mul, Token_MulEq); break;
-		case '=':
-			token.kind = Token_Eq;
-			if (t->curr_rune == '>') {
+		case '%':
+			token->kind = Token_Mod;
+			if (t->curr_rune == '=') {
+				token->kind = Token_ModEq;
+			} else if (t->curr_rune == '&') {
+				token->kind = Token_ModMod;
 				advance_to_next_rune(t);
-				token.kind = Token_DoubleArrowRight;
-			} else if (t->curr_rune == '=') {
-				advance_to_next_rune(t);
-				token.kind = Token_CmpEq;
+				if (t->curr_rune == '=') {
+					token->kind = Token_ModModEq;
+					advance_to_next_rune(t);
+				}
 			}
 			break;
-		case '~': token.kind = token_kind_variant2(t, Token_Xor, Token_XorEq);  break;
-		case '!': token.kind = token_kind_variant2(t, Token_Not, Token_NotEq);  break;
-		case '+': token.kind = token_kind_variant2(t, Token_Add, Token_AddEq);  break;
-		case '-':
-			token.kind = Token_Sub;
+
+		case '*':
+			token->kind = Token_Mul;
 			if (t->curr_rune == '=') {
 				advance_to_next_rune(t);
-				token.kind = Token_SubEq;
+				token->kind = Token_MulEq;
+			}
+			break;
+		case '=':
+			token->kind = Token_Eq;
+			if (t->curr_rune == '>') {
+				advance_to_next_rune(t);
+				token->kind = Token_DoubleArrowRight;
+			} else if (t->curr_rune == '=') {
+				advance_to_next_rune(t);
+				token->kind = Token_CmpEq;
+			}
+			break;
+		case '~':
+			token->kind = Token_Xor;
+			if (t->curr_rune == '=') {
+				advance_to_next_rune(t);
+				token->kind = Token_XorEq;
+			}
+			break;
+		case '!':
+			token->kind = Token_Not;
+			if (t->curr_rune == '=') {
+				advance_to_next_rune(t);
+				token->kind = Token_NotEq;
+			}
+			break;
+		case '+':
+			token->kind = Token_Add;
+			if (t->curr_rune == '=') {
+				advance_to_next_rune(t);
+				token->kind = Token_AddEq;
+			}
+			break;
+		case '-':
+			token->kind = Token_Sub;
+			if (t->curr_rune == '=') {
+				advance_to_next_rune(t);
+				token->kind = Token_SubEq;
 			} else if (t->curr_rune == '-' && peek_byte(t) == '-') {
 				advance_to_next_rune(t);
 				advance_to_next_rune(t);
-				token.kind = Token_Undef;
+				token->kind = Token_Undef;
 			} else if (t->curr_rune == '>') {
 				advance_to_next_rune(t);
-				token.kind = Token_ArrowRight;
+				token->kind = Token_ArrowRight;
 			}
 			break;

@@ -1138,20 +1133,24 @@ Token tokenizer_get_token(Tokenizer *t) {
 				while (t->curr_rune != '\n' && t->curr_rune != GB_RUNE_EOF) {
 					advance_to_next_rune(t);
 				}
-				token.kind = Token_Comment;
+				token->kind = Token_Comment;
 			} else {
-				token.kind = Token_Hash;
+				token->kind = Token_Hash;
 			}
 			break;


 		case '/': {
+			token->kind = Token_Quo;
 			if (t->curr_rune == '/') {
+				token->kind = Token_Comment;
+
 				while (t->curr_rune != '\n' && t->curr_rune != GB_RUNE_EOF) {
 					advance_to_next_rune(t);
 				}
-				token.kind = Token_Comment;
 			} else if (t->curr_rune == '*') {
+				token->kind = Token_Comment;
+
 				isize comment_scope = 1;
 				advance_to_next_rune(t);
 				while (comment_scope > 0) {
@@ -1173,37 +1172,81 @@ Token tokenizer_get_token(Tokenizer *t) {
 						advance_to_next_rune(t);
 					}
 				}
-				token.kind = Token_Comment;
-			} else {
-				token.kind = token_kind_variant2(t, Token_Quo, Token_QuoEq);
+			} else if (t->curr_rune == '=') {
+				advance_to_next_rune(t);
+				token->kind = Token_QuoEq;
 			}
 		} break;

 		case '<':
+			token->kind = Token_Lt;
 			if (t->curr_rune == '-') {
 				advance_to_next_rune(t);
-				token.kind = Token_ArrowLeft;
-			} else {
-				token.kind = token_kind_dub_eq(t, '<', Token_Lt, Token_LtEq, Token_Shl, Token_ShlEq);
-			}
-			break;
-		case '>': token.kind = token_kind_dub_eq(t, '>', Token_Gt, Token_GtEq, Token_Shr, Token_ShrEq); break;
-
-		case '&':
-			token.kind = Token_And;
-			if (t->curr_rune == '~') {
-				token.kind = Token_AndNot;
+				token->kind = Token_ArrowLeft;
+			} else if (t->curr_rune == '=') {
+				token->kind = Token_LtEq;
+				advance_to_next_rune(t);
+			} else if (t->curr_rune == '<') {
+				token->kind = Token_Shl;
 				advance_to_next_rune(t);
 				if (t->curr_rune == '=') {
-					token.kind = Token_AndNotEq;
+					token->kind = Token_ShlEq;
 					advance_to_next_rune(t);
 				}
-			} else {
-				token.kind = token_kind_dub_eq(t, '&', Token_And, Token_AndEq, Token_CmpAnd, Token_CmpAndEq);
 			}
 			break;

-		case '|': token.kind = token_kind_dub_eq(t, '|', Token_Or, Token_OrEq, Token_CmpOr, Token_CmpOrEq); break;
+		case '>':
+			token->kind = Token_Gt;
+			if (t->curr_rune == '=') {
+				token->kind = Token_GtEq;
+				advance_to_next_rune(t);
+			} else if (t->curr_rune == '>') {
+				token->kind = Token_Shr;
+				advance_to_next_rune(t);
+				if (t->curr_rune == '=') {
+					token->kind = Token_ShrEq;
+					advance_to_next_rune(t);
+				}
+			}
+			break;
+
+		case '&':
+			token->kind = Token_And;
+			if (t->curr_rune == '~') {
+				token->kind = Token_AndNot;
+				advance_to_next_rune(t);
+				if (t->curr_rune == '=') {
+					token->kind = Token_AndNotEq;
+					advance_to_next_rune(t);
+				}
+			} else if (t->curr_rune == '=') {
+				token->kind = Token_AndEq;
+				advance_to_next_rune(t);
+			} else if (t->curr_rune == '&') {
+				token->kind = Token_CmpAnd;
+				advance_to_next_rune(t);
+				if (t->curr_rune == '=') {
+					token->kind = Token_CmpAndEq;
+					advance_to_next_rune(t);
+				}
+			}
+			break;
+
+		case '|':
+			token->kind = Token_Or;
+			if (t->curr_rune == '=') {
+				token->kind = Token_OrEq;
+				advance_to_next_rune(t);
+			} else if (t->curr_rune == '|') {
+				token->kind = Token_CmpOr;
+				advance_to_next_rune(t);
+				if (t->curr_rune == '=') {
+					token->kind = Token_CmpOrEq;
+					advance_to_next_rune(t);
+				}
+			}
+			break;

 		default:
 			if (curr_rune != GB_RUNE_BOM) {
@@ -1211,11 +1254,11 @@ Token tokenizer_get_token(Tokenizer *t) {
 				int len = cast(int)gb_utf8_encode_rune(str, curr_rune);
 				tokenizer_err(t, "Illegal character: %.*s (%d) ", len, str, curr_rune);
 			}
-			token.kind = Token_Invalid;
+			token->kind = Token_Invalid;
 			break;
 		}
 	}

-	token.string.len = t->curr - token.string.text;
-	return token;
+	token->string.len = t->curr - token->string.text;
+	return;
 }
--- a/src/types.cpp
+++ b/src/types.cpp
@@ -770,7 +770,7 @@ void set_base_type(Type *t, Type *base) {
 Type *alloc_type(TypeKind kind) {
 	gbAllocator a = heap_allocator();
 	Type *t = gb_alloc_item(a, Type);
-	gb_zero_item(t);
+	zero_item(t);
 	t->kind = kind;
 	t->cached_size  = -1;
 	t->cached_align = -1;