make re.split consistent with strutils.split and other programming languages; refs #7278

This commit is contained in:
Andreas Rumpf
2018-03-05 21:39:13 +01:00
parent 5c8332d871
commit e2094bc6f4
2 changed files with 33 additions and 13 deletions

View File

@@ -4,10 +4,16 @@
#### Breaking changes in the standard library
- ``re.split`` for empty regular expressions now yields every character in
the string which is what other programming languages chose to do.
#### Breaking changes in the compiler
### Library additions
- ``re.split`` now also supports the ``maxsplit`` parameter for consistency
with ``strutils.split``.
### Library changes
### Language additions

View File

@@ -498,7 +498,7 @@ proc transformFile*(infile, outfile: string,
var x = readFile(infile).string
writeFile(outfile, x.multiReplace(subs))
iterator split*(s: string, sep: Regex): string =
iterator split*(s: string, sep: Regex; maxsplit = -1): string =
## Splits the string ``s`` into substrings.
##
## Substrings are separated by the regular expression ``sep``
@@ -520,22 +520,28 @@ iterator split*(s: string, sep: Regex): string =
## "example"
## ""
##
var
first = -1
last = -1
while last < len(s):
var x = matchLen(s, sep, last)
if x > 0: inc(last, x)
first = last
if x == 0: inc(last)
var last = 0
var splits = maxsplit
var x: int
while last <= len(s):
var first = last
var sepLen = 1
while last < len(s):
x = matchLen(s, sep, last)
if x >= 0: break
if x >= 0:
sepLen = x
break
inc(last)
if first <= last:
yield substr(s, first, last-1)
if x == 0:
if last >= len(s): break
inc last
if splits == 0: last = len(s)
yield substr(s, first, last-1)
if splits == 0: break
dec(splits)
inc(last, sepLen)
proc split*(s: string, sep: Regex): seq[string] {.inline.} =
proc split*(s: string, sep: Regex, maxsplit = -1): seq[string] {.inline.} =
## Splits the string ``s`` into a seq of substrings.
##
## The portion matched by ``sep`` is not returned.
@@ -632,6 +638,14 @@ when isMainModule:
accum.add(word)
doAssert(accum == @["AAA", "", "BBB"])
doAssert(split("abc", re"") == @["a", "b", "c"])
doAssert(split("", re"") == @[])
doAssert(split("a;b;c", re";") == @["a", "b", "c"])
doAssert(split(";a;b;c", re";") == @["", "a", "b", "c"])
doAssert(split(";a;b;c;", re";") == @["", "a", "b", "c", ""])
doAssert(split("a;b;c;", re";") == @["a", "b", "c", ""])
for x in findAll("abcdef", re"^{.}", 3):
doAssert x == "d"
accum = @[]