mirror of
https://github.com/nim-lang/Nim.git
synced 2026-01-04 12:07:51 +00:00
make re.split consistent with strutils.split and other programming languages; refs #7278
This commit is contained in:
@@ -4,10 +4,16 @@
|
||||
|
||||
#### Breaking changes in the standard library
|
||||
|
||||
- ``re.split`` for empty regular expressions now yields every character in
|
||||
the string which is what other programming languages chose to do.
|
||||
|
||||
#### Breaking changes in the compiler
|
||||
|
||||
### Library additions
|
||||
|
||||
- ``re.split`` now also supports the ``maxsplit`` parameter for consistency
|
||||
with ``strutils.split``.
|
||||
|
||||
### Library changes
|
||||
|
||||
### Language additions
|
||||
|
||||
@@ -498,7 +498,7 @@ proc transformFile*(infile, outfile: string,
|
||||
var x = readFile(infile).string
|
||||
writeFile(outfile, x.multiReplace(subs))
|
||||
|
||||
iterator split*(s: string, sep: Regex): string =
|
||||
iterator split*(s: string, sep: Regex; maxsplit = -1): string =
|
||||
## Splits the string ``s`` into substrings.
|
||||
##
|
||||
## Substrings are separated by the regular expression ``sep``
|
||||
@@ -520,22 +520,28 @@ iterator split*(s: string, sep: Regex): string =
|
||||
## "example"
|
||||
## ""
|
||||
##
|
||||
var
|
||||
first = -1
|
||||
last = -1
|
||||
while last < len(s):
|
||||
var x = matchLen(s, sep, last)
|
||||
if x > 0: inc(last, x)
|
||||
first = last
|
||||
if x == 0: inc(last)
|
||||
var last = 0
|
||||
var splits = maxsplit
|
||||
var x: int
|
||||
while last <= len(s):
|
||||
var first = last
|
||||
var sepLen = 1
|
||||
while last < len(s):
|
||||
x = matchLen(s, sep, last)
|
||||
if x >= 0: break
|
||||
if x >= 0:
|
||||
sepLen = x
|
||||
break
|
||||
inc(last)
|
||||
if first <= last:
|
||||
yield substr(s, first, last-1)
|
||||
if x == 0:
|
||||
if last >= len(s): break
|
||||
inc last
|
||||
if splits == 0: last = len(s)
|
||||
yield substr(s, first, last-1)
|
||||
if splits == 0: break
|
||||
dec(splits)
|
||||
inc(last, sepLen)
|
||||
|
||||
proc split*(s: string, sep: Regex): seq[string] {.inline.} =
|
||||
proc split*(s: string, sep: Regex, maxsplit = -1): seq[string] {.inline.} =
|
||||
## Splits the string ``s`` into a seq of substrings.
|
||||
##
|
||||
## The portion matched by ``sep`` is not returned.
|
||||
@@ -632,6 +638,14 @@ when isMainModule:
|
||||
accum.add(word)
|
||||
doAssert(accum == @["AAA", "", "BBB"])
|
||||
|
||||
doAssert(split("abc", re"") == @["a", "b", "c"])
|
||||
doAssert(split("", re"") == @[])
|
||||
|
||||
doAssert(split("a;b;c", re";") == @["a", "b", "c"])
|
||||
doAssert(split(";a;b;c", re";") == @["", "a", "b", "c"])
|
||||
doAssert(split(";a;b;c;", re";") == @["", "a", "b", "c", ""])
|
||||
doAssert(split("a;b;c;", re";") == @["a", "b", "c", ""])
|
||||
|
||||
for x in findAll("abcdef", re"^{.}", 3):
|
||||
doAssert x == "d"
|
||||
accum = @[]
|
||||
|
||||
Reference in New Issue
Block a user