mirror of
https://github.com/nim-lang/Nim.git
synced 2025-12-28 08:54:53 +00:00
fixes #19846
This commit is contained in:
@@ -1037,6 +1037,19 @@ proc split*(s: openArray[char], sep: Rune, maxsplit: int = -1): seq[string] {.no
|
||||
## that returns a sequence of substrings.
|
||||
accResult(split(s, sep, maxsplit))
|
||||
|
||||
func getRuneHeadIdx(s: openArray[char], idx: int): int =
|
||||
## Given `[idx]` is within a Rune, then `s[result]` is the first byte of that Rune.
|
||||
result = idx
|
||||
if s[result] <= '\x7F': # 0b0111_1111
|
||||
return
|
||||
# 0b1...
|
||||
dec result
|
||||
for _ in 0..1:
|
||||
if s[result] >= '\xC0': # 0b11xx_xxxx
|
||||
# 0b110... or 0b1110...
|
||||
return
|
||||
dec result
|
||||
|
||||
proc strip*(s: openArray[char], leading = true, trailing = true,
|
||||
runes: openArray[Rune] = unicodeSpaces): string {.noSideEffect,
|
||||
rtl, extern: "nucStrip".} =
|
||||
@@ -1073,18 +1086,9 @@ proc strip*(s: openArray[char], leading = true, trailing = true,
|
||||
xI: int
|
||||
rune: Rune
|
||||
while i >= 0:
|
||||
i = getRuneHeadIdx(s, i)
|
||||
xI = i
|
||||
fastRuneAt(s, xI, rune)
|
||||
var yI = i - 1
|
||||
while yI >= 0:
|
||||
var
|
||||
yIend = yI
|
||||
pRune: Rune
|
||||
fastRuneAt(s, yIend, pRune)
|
||||
if yIend < xI: break
|
||||
i = yI
|
||||
rune = pRune
|
||||
dec(yI)
|
||||
if not runes.contains(rune):
|
||||
eI = xI - 1
|
||||
break
|
||||
|
||||
@@ -194,6 +194,23 @@ block stripTests:
|
||||
doAssert(strip("×text×", leading = false, runes = ["×".asRune]) == "×text")
|
||||
doAssert(strip("×text×", trailing = false, runes = ["×".asRune]) == "text×")
|
||||
|
||||
doAssert(strip("\u2000") == "")
|
||||
doAssert(strip("a\u2000") == "a")
|
||||
|
||||
# bug #19846
|
||||
block:
|
||||
# check against unicode whose utf8 byteLen > 2
|
||||
doAssert(strip("‟„”“‛‚’‘‗•STR•‗‘’‚‛“”„‟", runes = "•‗‘’‚‛“”„‟".toRunes) == "STR")
|
||||
let chi = "abc\u8377\u9020"
|
||||
doAssert(strip(chi, leading = false, runes = ["\u9020".asRune]) == "abc\u8377")
|
||||
doAssert(strip(chi) == chi) # the last byte of s is \x0a, which is in unicodeSpace
|
||||
|
||||
let
|
||||
grinning_face = "\u{1f600}"
|
||||
thinking_face = "\u{1f914}"
|
||||
doAssert(strip(grinning_face & thinking_face & thinking_face,
|
||||
runes = thinking_face.toRunes) == grinning_face)
|
||||
|
||||
block repeatTests:
|
||||
doAssert repeat('c'.Rune, 5) == "ccccc"
|
||||
doAssert repeat("×".asRune, 5) == "×××××"
|
||||
|
||||
Reference in New Issue
Block a user