mirror of
https://github.com/nim-lang/Nim.git
synced 2026-04-19 05:50:30 +00:00
strutils.split/rsplit now return src on an empty sep (#22136)
This is a rebase of an earlier rejected PR. Following the discussion
around it, this commit provides a valid output for and edge case
of an empty separator for `split` and `rsplit` routines. The empty
separator is interpreted as "split by no separators" and the initial
string is returned. This is consistent with the behaviour of the
`set[char]` version of `split`/`rsplit` routines and unifies them all.
Compared to a commit merged earlier, this one has a benefit of
not using assertions that will be removed in release builds
and thus still not preventing possible infinite loops (which was the
earlier behaviour for this edge case for separator of type `string`).
Co-authored-by: Andreas Rumpf <rumpf_a@web.de>
(cherry picked from commit 5e529b3bfa)
This commit is contained in:
@@ -346,11 +346,14 @@ func cmpIgnoreStyle*(a, b: string): int {.rtl, extern: "nsuCmpIgnoreStyle".} =
|
||||
# --------- Private templates for different split separators -----------
|
||||
|
||||
func substrEq(s: string, pos: int, substr: string): bool =
|
||||
var i = 0
|
||||
# Always returns false for empty `substr`
|
||||
var length = substr.len
|
||||
while i < length and pos+i < s.len and s[pos+i] == substr[i]:
|
||||
inc i
|
||||
return i == length
|
||||
if length > 0:
|
||||
var i = 0
|
||||
while i < length and pos+i < s.len and s[pos+i] == substr[i]:
|
||||
inc i
|
||||
i == length
|
||||
else: false
|
||||
|
||||
template stringHasSep(s: string, index: int, seps: set[char]): bool =
|
||||
s[index] in seps
|
||||
@@ -492,12 +495,17 @@ iterator split*(s: string, sep: string, maxsplit: int = -1): string =
|
||||
## "is"
|
||||
## "corrupted"
|
||||
##
|
||||
## .. note:: Empty separator string results in returning an original string,
|
||||
## following the interpretation "split by no element".
|
||||
##
|
||||
## See also:
|
||||
## * `rsplit iterator<#rsplit.i,string,string,int,bool>`_
|
||||
## * `splitLines iterator<#splitLines.i,string>`_
|
||||
## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_
|
||||
## * `split func<#split,string,string,int>`_
|
||||
splitCommon(s, sep, maxsplit, sep.len)
|
||||
let sepLen = if sep.len == 0: 1 # prevents infinite loop
|
||||
else: sep.len
|
||||
splitCommon(s, sep, maxsplit, sepLen)
|
||||
|
||||
|
||||
template rsplitCommon(s, sep, maxsplit, sepLen) =
|
||||
@@ -567,6 +575,9 @@ iterator rsplit*(s: string, seps: set[char] = Whitespace,
|
||||
##
|
||||
## Substrings are separated from the right by the set of chars `seps`
|
||||
##
|
||||
## .. note:: Empty separator set results in returning an original string,
|
||||
## following the interpretation "split by no element".
|
||||
##
|
||||
## See also:
|
||||
## * `split iterator<#split.i,string,set[char],int>`_
|
||||
## * `splitLines iterator<#splitLines.i,string>`_
|
||||
@@ -592,12 +603,17 @@ iterator rsplit*(s: string, sep: string, maxsplit: int = -1,
|
||||
##
|
||||
## Substrings are separated from the right by the string `sep`
|
||||
##
|
||||
## .. note:: Empty separator string results in returning an original string,
|
||||
## following the interpretation "split by no element".
|
||||
##
|
||||
## See also:
|
||||
## * `split iterator<#split.i,string,string,int>`_
|
||||
## * `splitLines iterator<#splitLines.i,string>`_
|
||||
## * `splitWhitespace iterator<#splitWhitespace.i,string,int>`_
|
||||
## * `rsplit func<#rsplit,string,string,int>`_
|
||||
rsplitCommon(s, sep, maxsplit, sep.len)
|
||||
let sepLen = if sep.len == 0: 1 # prevents infinite loop
|
||||
else: sep.len
|
||||
rsplitCommon(s, sep, maxsplit, sepLen)
|
||||
|
||||
iterator splitLines*(s: string, keepEol = false): string =
|
||||
## Splits the string `s` into its containing lines.
|
||||
@@ -708,6 +724,9 @@ func split*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): seq[
|
||||
## The same as the `split iterator <#split.i,string,set[char],int>`_ (see its
|
||||
## documentation), but is a func that returns a sequence of substrings.
|
||||
##
|
||||
## .. note:: Empty separator set results in returning an original string,
|
||||
## following the interpretation "split by no element".
|
||||
##
|
||||
## See also:
|
||||
## * `split iterator <#split.i,string,set[char],int>`_
|
||||
## * `rsplit func<#rsplit,string,set[char],int>`_
|
||||
@@ -716,6 +735,7 @@ func split*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): seq[
|
||||
runnableExamples:
|
||||
doAssert "a,b;c".split({',', ';'}) == @["a", "b", "c"]
|
||||
doAssert "".split({' '}) == @[""]
|
||||
doAssert "empty seps return unsplit s".split({}) == @["empty seps return unsplit s"]
|
||||
accResult(split(s, seps, maxsplit))
|
||||
|
||||
func split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl,
|
||||
@@ -725,6 +745,9 @@ func split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl,
|
||||
## Substrings are separated by the string `sep`. This is a wrapper around the
|
||||
## `split iterator <#split.i,string,string,int>`_.
|
||||
##
|
||||
## .. note:: Empty separator string results in returning an original string,
|
||||
## following the interpretation "split by no element".
|
||||
##
|
||||
## See also:
|
||||
## * `split iterator <#split.i,string,string,int>`_
|
||||
## * `rsplit func<#rsplit,string,string,int>`_
|
||||
@@ -737,8 +760,7 @@ func split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl,
|
||||
doAssert "a largely spaced sentence".split(" ") == @["a", "", "largely",
|
||||
"", "", "", "spaced", "sentence"]
|
||||
doAssert "a largely spaced sentence".split(" ", maxsplit = 1) == @["a", " largely spaced sentence"]
|
||||
doAssert(sep.len > 0)
|
||||
|
||||
doAssert "empty sep returns unsplit s".split("") == @["empty sep returns unsplit s"]
|
||||
accResult(split(s, sep, maxsplit))
|
||||
|
||||
func rsplit*(s: string, sep: char, maxsplit: int = -1): seq[string] {.rtl,
|
||||
@@ -788,6 +810,9 @@ func rsplit*(s: string, seps: set[char] = Whitespace,
|
||||
## .. code-block:: nim
|
||||
## @["Root#Object#Method", "Index"]
|
||||
##
|
||||
## .. note:: Empty separator set results in returning an original string,
|
||||
## following the interpretation "split by no element".
|
||||
##
|
||||
## See also:
|
||||
## * `rsplit iterator <#rsplit.i,string,set[char],int>`_
|
||||
## * `split func<#split,string,set[char],int>`_
|
||||
@@ -815,6 +840,9 @@ func rsplit*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl,
|
||||
## .. code-block:: nim
|
||||
## @["Root#Object#Method", "Index"]
|
||||
##
|
||||
## .. note:: Empty separator string results in returning an original string,
|
||||
## following the interpretation "split by no element".
|
||||
##
|
||||
## See also:
|
||||
## * `rsplit iterator <#rsplit.i,string,string,int,bool>`_
|
||||
## * `split func<#split,string,string,int>`_
|
||||
@@ -829,6 +857,7 @@ func rsplit*(s: string, sep: string, maxsplit: int = -1): seq[string] {.rtl,
|
||||
doAssert "".rsplit("Elon Musk") == @[""]
|
||||
doAssert "a largely spaced sentence".rsplit(" ") == @["a", "",
|
||||
"largely", "", "", "", "spaced", "sentence"]
|
||||
doAssert "empty sep returns unsplit s".rsplit("") == @["empty sep returns unsplit s"]
|
||||
accResult(rsplit(s, sep, maxsplit))
|
||||
result.reverse()
|
||||
|
||||
|
||||
@@ -150,12 +150,7 @@ block tsplit2:
|
||||
s.add("#")
|
||||
s.add(w)
|
||||
|
||||
var errored = false
|
||||
try:
|
||||
discard "hello".split("")
|
||||
except AssertionDefect:
|
||||
errored = true
|
||||
doAssert errored
|
||||
doAssert "true".split("") == @["true"]
|
||||
|
||||
block txmlgen:
|
||||
var nim = "Nim"
|
||||
|
||||
@@ -26,7 +26,6 @@ Hi Andreas! How do you feel, Rumpf?
|
||||
[2, 3, 4, 5]
|
||||
[2, 3, 4, 5, 6]
|
||||
[1, 2, 3, 4, 5, 6]
|
||||
true
|
||||
<h1><a href="http://force7.de/nim">Nim</a></h1>
|
||||
'''
|
||||
"""
|
||||
@@ -206,11 +205,7 @@ block tsplit2:
|
||||
s.add("#")
|
||||
s.add(w)
|
||||
|
||||
try:
|
||||
discard "hello".split("")
|
||||
echo "false"
|
||||
except AssertionDefect:
|
||||
echo "true"
|
||||
doAssert "true".split("") == @["true"]
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -51,6 +51,15 @@ template main() =
|
||||
doAssert s.split(maxsplit = 4) == @["", "this", "is", "an", "example "]
|
||||
doAssert s.split(' ', maxsplit = 1) == @["", "this is an example "]
|
||||
doAssert s.split(" ", maxsplit = 4) == @["", "this", "is", "an", "example "]
|
||||
# Empty string:
|
||||
doAssert "".split() == @[""]
|
||||
doAssert "".split(" ") == @[""]
|
||||
doAssert "".split({' '}) == @[""]
|
||||
# Empty separators:
|
||||
doAssert "".split({}) == @[""]
|
||||
doAssert "".split("") == @[""]
|
||||
doAssert s.split({}) == @[s]
|
||||
doAssert s.split("") == @[s]
|
||||
|
||||
block: # splitLines
|
||||
let fixture = "a\nb\rc\r\nd"
|
||||
@@ -61,12 +70,21 @@ template main() =
|
||||
block: # rsplit
|
||||
doAssert rsplit("foo bar", seps = Whitespace) == @["foo", "bar"]
|
||||
doAssert rsplit(" foo bar", seps = Whitespace, maxsplit = 1) == @[" foo", "bar"]
|
||||
doAssert rsplit(" foo bar ", seps = Whitespace, maxsplit = 1) == @[
|
||||
" foo bar", ""]
|
||||
doAssert rsplit(" foo bar ", seps = Whitespace, maxsplit = 1) == @[" foo bar", ""]
|
||||
doAssert rsplit(":foo:bar", sep = ':') == @["", "foo", "bar"]
|
||||
doAssert rsplit(":foo:bar", sep = ':', maxsplit = 2) == @["", "foo", "bar"]
|
||||
doAssert rsplit(":foo:bar", sep = ':', maxsplit = 3) == @["", "foo", "bar"]
|
||||
doAssert rsplit("foothebar", sep = "the") == @["foo", "bar"]
|
||||
# Empty string:
|
||||
doAssert "".rsplit() == @[""]
|
||||
doAssert "".rsplit(" ") == @[""]
|
||||
doAssert "".rsplit({' '}) == @[""]
|
||||
# Empty separators:
|
||||
let s = " this is an example "
|
||||
doAssert "".rsplit({}) == @[""]
|
||||
doAssert "".rsplit("") == @[""]
|
||||
doAssert s.rsplit({}) == @[s]
|
||||
doAssert s.rsplit("") == @[s]
|
||||
|
||||
block: # splitWhitespace
|
||||
let s = " this is an example "
|
||||
|
||||
Reference in New Issue
Block a user