From 6f1222e9bfa76fdb45668465d62c2a454c07eca8 Mon Sep 17 00:00:00 2001 From: Jeroen van Rijn Date: Thu, 16 Jun 2022 16:12:15 +0200 Subject: [PATCH] Update `strings.prefix_length` to handle partial UTF-8 runes. --- core/strings/strings.odin | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/core/strings/strings.odin b/core/strings/strings.odin index 678cc94cd..6bdafbba4 100644 --- a/core/strings/strings.odin +++ b/core/strings/strings.odin @@ -225,14 +225,23 @@ equal_fold :: proc(u, v: string) -> bool { */ prefix_length :: proc(a, b: string) -> (n: int) { _len := min(len(a), len(b)) - idx := 0 - #no_bounds_check for idx < _len && a[idx] == b[idx] { - idx += 1 + // Scan for matches including partial codepoints. + #no_bounds_check for n < _len && a[n] == b[n] { + n += 1 + } - if a[idx] & 128 != 128 { - // new codepoint or end of multi-byte codepoint, update match length - n = idx + // Now scan to ignore partial codepoints. + if n > 0 { + s := a[:n] + n = 0 + for { + r0, w := utf8.decode_rune(s[n:]) + if r0 != utf8.RUNE_ERROR { + n += w + } else { + break + } } } return