From 349039678a70a0397a44b04bb2445f99d24b8d86 Mon Sep 17 00:00:00 2001 From: Kaushal Modi Date: Wed, 11 Mar 2020 19:41:45 -0400 Subject: [PATCH] unicode.split: Fix the splitting when a Rune separator is used [backport] (#13629) * unicode.split: Fix the splitting when a Rune separator is used [backport] - Fixes https://github.com/nim-lang/Nim/issues/13628 - Ref https://irclogs.nim-lang.org/11-03-2020.html#20:01:34 * unicode.split: Remove the sepLen based logic.. resulted in wrong jumps (cherry picked from commit 64995db4fdc8d39a9d9c7a1bfb5d8e1dd8c0f902) --- lib/pure/unicode.nim | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/lib/pure/unicode.nim b/lib/pure/unicode.nim index ad754a07f2..a775d5f52d 100644 --- a/lib/pure/unicode.nim +++ b/lib/pure/unicode.nim @@ -933,27 +933,23 @@ proc stringHasSep(s: string, index: int, sep: Rune): bool = fastRuneAt(s, index, rune, false) return sep == rune -template splitCommon(s, sep, maxsplit: untyped, sepLen: int = -1) = +template splitCommon(s, sep, maxsplit: untyped) = ## Common code for split procedures. + let + sLen = len(s) var last = 0 splits = maxsplit - if len(s) > 0: - while last <= len(s): + if sLen > 0: + while last <= sLen: var first = last - while last < len(s) and not stringHasSep(s, last, sep): - when sep is Rune: - inc(last, sepLen) - else: - inc(last, runeLenAt(s, last)) - if splits == 0: last = len(s) + while last < sLen and not stringHasSep(s, last, sep): + inc(last, runeLenAt(s, last)) + if splits == 0: last = sLen yield s[first .. (last - 1)] if splits == 0: break dec(splits) - when sep is Rune: - inc(last, sepLen) - else: - inc(last, if last < len(s): runeLenAt(s, last) else: 1) + inc(last, if last < sLen: runeLenAt(s, last) else: 1) iterator split*(s: string, seps: openArray[Rune] = unicodeSpaces, maxsplit: int = -1): string = @@ -1037,7 +1033,7 @@ iterator split*(s: string, sep: Rune, maxsplit: int = -1): string = ## "" ## "" ## - splitCommon(s, sep, maxsplit, sep.size) + splitCommon(s, sep, maxsplit) proc split*(s: string, seps: openArray[Rune] = unicodeSpaces, maxsplit: int = -1): seq[string] {.noSideEffect, rtl, extern: "nucSplitRunes".} = @@ -1424,6 +1420,7 @@ when isMainModule: "an", "example", "", ""] doAssert s.split(maxsplit = 4) == @["", "this", "is", "an", "example "] doAssert s.split(' '.Rune, maxsplit = 1) == @["", "this is an example "] + doAssert s3.split("×".runeAt(0)) == @[":this", "is", "an:example", "", ""] block stripTests: doAssert(strip("") == "")