diff --git a/core/encoding/xml/tokenizer.odin b/core/encoding/xml/tokenizer.odin index e453552b8..9247d2531 100644 --- a/core/encoding/xml/tokenizer.odin +++ b/core/encoding/xml/tokenizer.odin @@ -110,44 +110,51 @@ error :: proc(t: ^Tokenizer, offset: int, msg: string, args: ..any) { t.error_count += 1 } +@(optimization_mode="speed") advance_rune :: proc(using t: ^Tokenizer) { - if read_offset < len(src) { - offset = read_offset - if ch == '\n' { - line_offset = offset - line_count += 1 - } - r, w := rune(src[read_offset]), 1 - switch { - case r == 0: - error(t, t.offset, "illegal character NUL") - case r >= utf8.RUNE_SELF: - r, w = utf8.decode_rune_in_string(src[read_offset:]) - if r == utf8.RUNE_ERROR && w == 1 { - error(t, t.offset, "illegal UTF-8 encoding") - } else if r == utf8.RUNE_BOM && offset > 0 { - error(t, t.offset, "illegal byte order mark") + #no_bounds_check { + /* + Already bounds-checked here. + */ + if read_offset < len(src) { + offset = read_offset + if ch == '\n' { + line_offset = offset + line_count += 1 } + r, w := rune(src[read_offset]), 1 + switch { + case r == 0: + error(t, t.offset, "illegal character NUL") + case r >= utf8.RUNE_SELF: + r, w = #force_inline utf8.decode_rune_in_string(src[read_offset:]) + if r == utf8.RUNE_ERROR && w == 1 { + error(t, t.offset, "illegal UTF-8 encoding") + } else if r == utf8.RUNE_BOM && offset > 0 { + error(t, t.offset, "illegal byte order mark") + } + } + read_offset += w + ch = r + } else { + offset = len(src) + if ch == '\n' { + line_offset = offset + line_count += 1 + } + ch = -1 } - read_offset += w - ch = r - } else { - offset = len(src) - if ch == '\n' { - line_offset = offset - line_count += 1 - } - ch = -1 } } peek_byte :: proc(t: ^Tokenizer, offset := 0) -> byte { if t.read_offset+offset < len(t.src) { - return t.src[t.read_offset+offset] + #no_bounds_check return t.src[t.read_offset+offset] } return 0 } +@(optimization_mode="speed") skip_whitespace :: proc(t: ^Tokenizer) { for { switch t.ch { @@ -159,6 +166,7 @@ skip_whitespace :: proc(t: ^Tokenizer) { } } +@(optimization_mode="speed") is_letter :: proc(r: rune) -> bool { if r < utf8.RUNE_SELF { switch r { @@ -177,6 +185,7 @@ is_valid_identifier_rune :: proc(r: rune) -> bool { case '_', '-', ':': return true case 'A'..='Z', 'a'..='z': return true case '0'..'9': return true + case -1: return false } } @@ -205,6 +214,7 @@ scan_identifier :: proc(t: ^Tokenizer) -> string { return string(t.src[offset : t.offset]) } +@(optimization_mode="speed") scan_string :: proc(t: ^Tokenizer, offset: int, close: rune = '<', consume_close := false, multiline := true) -> (value: string, err: Error) { err = .None in_cdata := false diff --git a/core/encoding/xml/xml_reader.odin b/core/encoding/xml/xml_reader.odin index b2226e6b9..35dd83b3f 100644 --- a/core/encoding/xml/xml_reader.odin +++ b/core/encoding/xml/xml_reader.odin @@ -239,7 +239,7 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err /* Consume peeked `<` */ - tok := scan(t) + advance_rune(t) open := scan(t) #partial switch open.kind { @@ -265,7 +265,7 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err skip_element(t) or_return } case: - error(t, t.offset, "Expected \"