fixes #19846; std/unicode.strip trailing big chars (#25274)

fixes #19846
This commit is contained in:
lit
2025-11-11 19:01:07 +08:00
committed by GitHub
parent cc4c7377b2
commit 2679b3221c
2 changed files with 31 additions and 10 deletions

View File

@@ -1037,6 +1037,19 @@ proc split*(s: openArray[char], sep: Rune, maxsplit: int = -1): seq[string] {.no
## that returns a sequence of substrings.
accResult(split(s, sep, maxsplit))
func getRuneHeadIdx(s: openArray[char], idx: int): int =
## Given `[idx]` is within a Rune, then `s[result]` is the first byte of that Rune.
result = idx
if s[result] <= '\x7F': # 0b0111_1111
return
# 0b1...
dec result
for _ in 0..1:
if s[result] >= '\xC0': # 0b11xx_xxxx
# 0b110... or 0b1110...
return
dec result
proc strip*(s: openArray[char], leading = true, trailing = true,
runes: openArray[Rune] = unicodeSpaces): string {.noSideEffect,
rtl, extern: "nucStrip".} =
@@ -1073,18 +1086,9 @@ proc strip*(s: openArray[char], leading = true, trailing = true,
xI: int
rune: Rune
while i >= 0:
i = getRuneHeadIdx(s, i)
xI = i
fastRuneAt(s, xI, rune)
var yI = i - 1
while yI >= 0:
var
yIend = yI
pRune: Rune
fastRuneAt(s, yIend, pRune)
if yIend < xI: break
i = yI
rune = pRune
dec(yI)
if not runes.contains(rune):
eI = xI - 1
break