fixes #19846; std/unicode.strip trailing big chars (#25274)

fixes #19846
This commit is contained in:
lit
2025-11-11 19:01:07 +08:00
committed by GitHub
parent cc4c7377b2
commit 2679b3221c
2 changed files with 31 additions and 10 deletions

View File

@@ -1037,6 +1037,19 @@ proc split*(s: openArray[char], sep: Rune, maxsplit: int = -1): seq[string] {.no
## that returns a sequence of substrings.
accResult(split(s, sep, maxsplit))
func getRuneHeadIdx(s: openArray[char], idx: int): int =
## Given `[idx]` is within a Rune, then `s[result]` is the first byte of that Rune.
result = idx
if s[result] <= '\x7F': # 0b0111_1111
return
# 0b1...
dec result
for _ in 0..1:
if s[result] >= '\xC0': # 0b11xx_xxxx
# 0b110... or 0b1110...
return
dec result
proc strip*(s: openArray[char], leading = true, trailing = true,
runes: openArray[Rune] = unicodeSpaces): string {.noSideEffect,
rtl, extern: "nucStrip".} =
@@ -1073,18 +1086,9 @@ proc strip*(s: openArray[char], leading = true, trailing = true,
xI: int
rune: Rune
while i >= 0:
i = getRuneHeadIdx(s, i)
xI = i
fastRuneAt(s, xI, rune)
var yI = i - 1
while yI >= 0:
var
yIend = yI
pRune: Rune
fastRuneAt(s, yIend, pRune)
if yIend < xI: break
i = yI
rune = pRune
dec(yI)
if not runes.contains(rune):
eI = xI - 1
break

View File

@@ -194,6 +194,23 @@ block stripTests:
doAssert(strip("×text×", leading = false, runes = ["×".asRune]) == "×text")
doAssert(strip("×text×", trailing = false, runes = ["×".asRune]) == "text×")
doAssert(strip("\u2000") == "")
doAssert(strip("a\u2000") == "a")
# bug #19846
block:
# check against unicode whose utf8 byteLen > 2
doAssert(strip("‟„”“‗•STR•‗“”„‟", runes = "•‗‘’‚‛“”„‟".toRunes) == "STR")
let chi = "abc\u8377\u9020"
doAssert(strip(chi, leading = false, runes = ["\u9020".asRune]) == "abc\u8377")
doAssert(strip(chi) == chi) # the last byte of s is \x0a, which is in unicodeSpace
let
grinning_face = "\u{1f600}"
thinking_face = "\u{1f914}"
doAssert(strip(grinning_face & thinking_face & thinking_face,
runes = thinking_face.toRunes) == grinning_face)
block repeatTests:
doAssert repeat('c'.Rune, 5) == "ccccc"
doAssert repeat("×".asRune, 5) == "×××××"