From a34b03acedb1eaf34b963c5d0ad2b8cc68cccf92 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Tue, 9 Jun 2026 15:32:17 +0100 Subject: [PATCH] Minor optimization help for `advance_to_next_rune` --- src/tokenizer.cpp | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index 000e5193a..936fddaf7 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -364,27 +364,30 @@ gb_internal void advance_to_next_rune(Tokenizer *t) { t->column_minus_one = -1; t->line_count++; } - if (t->read_curr < t->end) { - t->curr = t->read_curr; - Rune rune = *t->read_curr; - if (rune == 0) { - tokenizer_err(t, "Illegal character NUL"); - t->read_curr++; - } else if (rune & 0x80) { // not ASCII - isize width = utf8_decode(t->read_curr, t->end-t->read_curr, &rune); - t->read_curr += width; + + u8 *read_curr = t->read_curr; + u8 *end = t->end; + if (read_curr < end) { + Rune rune = *read_curr; + t->curr = read_curr; + if (cast(u32)(rune - 1) < 0x7f) { // 0x01..0x7F: ordinary ASCII (hot path) + t->read_curr = read_curr + 1; + } else if (rune & 0x80) { // multi-byte UTF-8 + isize width = utf8_decode(read_curr, end - read_curr, &rune); + t->read_curr = read_curr + width; if (rune == GB_RUNE_INVALID && width == 1) { tokenizer_err(t, "Illegal UTF-8 encoding"); - } else if (rune == GB_RUNE_BOM && t->curr-t->start > 0){ + } else if (rune == GB_RUNE_BOM && read_curr != t->start) { tokenizer_err(t, "Illegal byte order mark"); } - } else { - t->read_curr++; + } else { // rune == 0 (NUL) + t->read_curr = read_curr + 1; + tokenizer_err(t, "Illegal character NUL"); } t->curr_rune = rune; t->column_minus_one++; } else { - t->curr = t->end; + t->curr = end; t->curr_rune = GB_RUNE_EOF; } }